def tprocess_created_mission(str_mission_id): ''' Insert a mission's candidate to the active set if its accuracy is at least equal to the mission's accuracy threshold ''' try: # Find recently created mission mission = missions_collection.find_one({'_id': ObjectId(str_mission_id)}) if not mission: raise ValueError('Mission with id {} doesn\'t exist'.format(str_mission_id)) # Make sure the mission's accuracy is valid and matching images overlap if mission.get('is_accuracy_valid', False) and matching_has_overlap(mission): # Retrieve mission's candidate and validate its dimensions attrs = get_candidate_from_mission(mission) if not has_expected_dim(attrs): raise ValueError('Mission with id {} candidate has invalid dimensions'.format(str_mission_id)) # Add mission's details to active set if successfully completed, create potential anomaly otherwise if mission.get('accuracy') >= mission.get('accuracy_threshold'): attrs['usno_vector'] = get_usno_projection(attrs.get('image_key'), attrs.get('usno_band')).tolist() attrs['panstarr_vector'] = get_panstarr_projection(attrs.get('image_key'), attrs.get('panstarr_band')).tolist() member_id = active_set_collection.insert_one(attrs).inserted_id log_info('Inserted member with id {} in active set'.format(member_id)) # Run crawler as a new member has been inserted in the active set task = tcrawl_candidates.delay() log_info('Created crawl task with id: {}'.format(task.id)) else: potential_anomaly_id = potential_anomalies_collection.insert_one(attrs).inserted_id log_info('Inserted potential anomaly with id {} in DB'.format(potential_anomaly_id)) else: log_info('Ignored mission with id {}'.format(str_mission_id)) except Exception as e: log_error('Unable to process created mission: {}'.format(e))
def tcrawl_candidates(): ''' Crawl potential candidates in `m` and add the one with the lowest `v` value to the candidates collection ''' try: # Generate candidates that will be crawled m = 1001 S = get_potential_candidates(range(m), datasets_bands) # Define processes' chunk size num_processes = multiprocessing.cpu_count() chunk_size = math.floor(len(S) / num_processes) # Create `num_processes` parallel tasks tasks = [ tcompute_v.s( json.dumps( S[(chunk_size * i):(len(S) if i == num_processes - 1 else chunk_size * (i + 1))], )) for i in range(num_processes) ] # Define callback to execute when all parallel tasks are finished callback = thandle_compute_v_finished.s(json.dumps(S)) # Execute chord in the background chord((tasks), callback).delay() except Exception as e: log_error('Unable to crawl candidates: {}'.format(e))
def tcompute_v(S): ''' Return a dictionary where keys represent `S` candidates encoded using their `s_id` and values their respective `v` ''' # Retrieve potential candidates in `S` S = json.loads(S) # Retrieve active set A = list( active_set_collection.aggregate([{ '$match': {} }, { '$project': { '_id': 0, 'usno_vector': 1, 'panstarr_vector': 1 } }])) # Compute `v` for each element in `S` vs = {} for s in S: try: # Each `v` is initially set to 0 s_id = get_s_id(s) vs[s_id] = 0 # Retrieve potential candidate's projections x = get_usno_projection(s.get('image_key'), s.get('usno_band')) y = get_panstarr_projection(s.get('image_key'), s.get('panstarr_band')) # Compute `v` using the members of the active set for member in A: xi = member.get('usno_vector') yi = member.get('panstarr_vector') v = np.dot(np.dot(x, xi), np.dot(y, yi)) # Keep track of each `v` value using `s_id` vs[s_id] = vs[s_id] + v if s_id in vs else v except Exception as e: log_error('Exception thrown: {}'.format(e)) # An exception might be thrown if an image file doesn't exist. If so, assume candidate is # infinitely unlikely to be an anomaly vs[s_id] = float('Inf') return vs
def thandle_compute_v_finished(results, S): ''' Create a candidate in DB given the result of each individual process computation ''' try: # Retrieve processes' results and `S` S = json.loads(S) vs = reduce(lambda acc, x: acc.update(x) or acc, results, {}) # Find minimum `v` value and `s_id` vm = min(vs.values()) vm_s_id = next(s_id for s_id in vs if vs[s_id] == vm) # Extend candidate with its `v` value attrs = next(s for s in S if get_s_id(s) == vm_s_id) attrs['v'] = vm candidate_id = insert_candidate(attrs) log_info('[vm_s_id vm id]: [{} {} {}]'.format(vm_s_id, vm, candidate_id)) except Exception as e: log_error('Unable to insert candidate in DB: {}'.format(e))
def tinsert_in_active_set(potential_anomaly_as_str): ''' Insert a potential anomaly in the active set collection ''' try: # Retrieve potential anomaly potential_anomaly = json.loads(potential_anomaly_as_str) # Extend its attributes with the pre-processed USNO and PanSTARR vectors potential_anomaly['usno_vector'] = get_usno_projection( potential_anomaly.get('image_key'), potential_anomaly.get('usno_band') ).tolist() potential_anomaly['panstarr_vector'] = get_panstarr_projection( potential_anomaly.get('image_key'), potential_anomaly.get('panstarr_band') ).tolist() # Insert 'anomaly' in the active set member_id = active_set_collection.insert_one(potential_anomaly).inserted_id log_info('Inserted member with id {} in active set'.format(member_id)) except Exception as e: log_error('Unable to insert potential anomaly in the active set: {}'.format(e))