def get_score(self, a, b, dset_store, database_connection, neighbors_cache, xgb_model): if not a in self.cache: self.cache[a] = {} if not b in self.cache[a]: (neighbors50, neighbors100, neighbors200, neighbors300) = neighbors_cache.get_distances( a, b.timestamp, dset_store, database_connection) euclidian_distance = aux.euclidian_distance(a.position, b.position) frames_gap = a.timestamp.frames_difference(b.timestamp) - 1 d = b # detection ld = a # last detection (before detection) d_distinct_ids = list(set(d.candidate_ids)) ld_distinct_ids = list(set(ld.candidate_ids)) score_sum = 0 for d_can_id, d_can_score, d_can_orientation in d_distinct_ids: for ld_can_id, ld_can_score, ld_can_orientation in ld_distinct_ids: # hamming distance: int hamming_distance = aux.hamming_distance( ld_can_id, d_can_id) # difference of orientation angle to last detection of path, in radians o_change = abs(ld_can_orientation - d_can_orientation) if o_change > np.pi: o_change = 2 * np.pi - o_change # looking from the last detection towards its orientation, # what is the angle to the position of the next detection, in radians o_to_next = np.arctan2(d.position[1] - ld.position[1], d.position[0] - ld.position[0]) o_deviation = abs(ld_can_orientation - o_to_next) if o_deviation > np.pi: o_deviation = 2 * np.pi - o_deviation data_point = [ frames_gap, euclidian_distance, neighbors50, neighbors100, neighbors200, neighbors300, hamming_distance, o_change, o_deviation, d_can_score, ld_can_score ] xgb_data = xgb.DMatrix(np.array([data_point])) score_sum += xgb_model.predict(xgb_data) score = score_sum * 1.0 / (len(d_distinct_ids) * len(ld_distinct_ids)) self.cache[a][b] = score return self.cache[a][b]
def get_score( self, a, b, dset_store, database_connection, neighbors_cache, xgb_model ): if not a in self.cache: self.cache[ a ] = {} if not b in self.cache[ a ]: ( neighbors50, neighbors100, neighbors200, neighbors300 ) = neighbors_cache.get_distances( a, b.timestamp, dset_store, database_connection ) euclidian_distance = aux.euclidian_distance( a.position, b.position ) frames_gap = a.timestamp.frames_difference( b.timestamp ) - 1 d = b # detection ld = a # last detection (before detection) d_distinct_ids = list( set( d.candidate_ids ) ) ld_distinct_ids = list( set( ld.candidate_ids ) ) score_sum = 0 for d_can_id,d_can_score,d_can_orientation in d_distinct_ids: for ld_can_id,ld_can_score,ld_can_orientation in ld_distinct_ids: # hamming distance: int hamming_distance = aux.hamming_distance( ld_can_id, d_can_id ) # difference of orientation angle to last detection of path, in radians o_change = abs( ld_can_orientation - d_can_orientation ) if o_change > np.pi: o_change = 2*np.pi - o_change # looking from the last detection towards its orientation, # what is the angle to the position of the next detection, in radians o_to_next = np.arctan2( d.position[1] - ld.position[1], d.position[0] - ld.position[0] ) o_deviation = abs( ld_can_orientation - o_to_next ) if o_deviation > np.pi: o_deviation = 2*np.pi - o_deviation data_point = [ frames_gap, euclidian_distance, neighbors50, neighbors100, neighbors200, neighbors300, hamming_distance, o_change, o_deviation, d_can_score, ld_can_score ] xgb_data = xgb.DMatrix( np.array( [data_point] ) ) score_sum += xgb_model.predict(xgb_data) score = score_sum * 1.0 / (len(d_distinct_ids)*len(ld_distinct_ids)) self.cache[ a ][ b ] = score return self.cache[ a ][ b ]
def start( self ): self.dset_store.clear() self.path_manager.clear() start_timestamp = ds.TimeStamp( config.FRAME_START, config.CAM ) duration = config.FRAME_END - config.FRAME_START + 1 previous_timestamp = start_timestamp for x in range( config.FRAME_START+1, config.FRAME_END+1 ): timestamp = ds.TimeStamp( x, config.CAM ) timestamp.connect_with_previous( previous_timestamp ) previous_timestamp = timestamp print 'start generation' print ( ' date = ' + str( config.DATE[ 0 ] ) + '/' + str( config.DATE[ 1 ] ) + '/' + str( config.DATE[ 2 ] ) + ', ' + 'time = ' + str( config.TIME[ 0 ] ) + ':' + str( config.TIME[ 1 ] ) ) print ' cam = ' + str( config.CAM ) + ', frames = ' + str( config.FRAME_START ) + ' til ' + str( config.FRAME_END ) database_connection = db.Connection() timestamp = start_timestamp hammings = np.zeros( 13, dtype = np.int ) # nach Hammingabstand 0-12 gaps = {} for x in range( 0, duration ): print 'processing timestamp ' + timestamp.time_name dset = self.dset_store.get( timestamp, database_connection ) for d in dset.detections: truth_id = database_connection.get_truth_id( d ) if truth_id is not None: hamming_dis = aux.hamming_distance( truth_id, d.decoded_mean ) hammings[ hamming_dis ] += 1 path = self.path_manager.get_path( truth_id ) gap = path.add_detection_and_return_gap( d ) if gap is not None: gap = min(gap,16) # greater or equal 16 not listed separately if not gap in gaps: gaps[ gap ] = 0 gaps[ gap ] += 1 timestamp = timestamp.get_next() if timestamp is None: break print 'generation finished' print '--------------------------------' detection_count = sum( hammings ) hammings_percentage = hammings*1.0 / detection_count print 'validation finished' print '\nhammings result:' print np.round( hammings_percentage*100, 1 ) print '\ndetection count:' print detection_count print '\ngaps:' print [ g for g in sorted( gaps.items() ) ] print 'mean gap length: ' + str(sum([ l*n for l,n in gaps.items() ])*1.0/sum([ n for l,n in gaps.items() ])) print 'gaps count: ' + str( sum([ n for l,n in gaps.items() ] ) )
def start(self): self.path_manager.clear() self.dset_store.clear() timestamp = config.START_TIMESTAMP duration = config.FRAMES_DURATION print 'start generation' print ' host = ' + config.DB_HOST + ', date = ' + timestamp.date_name + ', cam = ' + str( timestamp.cam) print ' start time = ' + timestamp.time_name + ', duration = ' + str( duration) + ' frames' database_connection = db.Connection() if not timestamp.exists(database_connection): database_connection.close() print 'timestamp ' + timestamp.time_name + ' not found' print 'generation stopped' print '--------------------------------' return best_candidates = np.zeros(13, dtype=np.int) # nach Hammingabstand 0-12 all_candidates = np.zeros(13, dtype=np.int) # nach Hammingabstand 0-12 gaps = {} for x in range(0, duration): print 'processing timestamp ' + timestamp.time_name dset = self.dset_store.get(timestamp, database_connection) for d in dset.detections: truth_id = database_connection.get_truth_id(d) if truth_id is not None: candidates = list(set([c[0] for c in d.candidate_ids])) hammings = [ aux.hamming_distance(truth_id, c) for c in candidates ] hammings.sort() best_candidates[hammings[0]] += 1 for h in hammings: all_candidates[h] += 1 path = self.path_manager.get_path(truth_id) gap = path.add_detection_and_return_gap(d) if gap is not None: gap = min( gap, 16) # greater or equal 16 not listed separately if not gap in gaps: gaps[gap] = 0 gaps[gap] += 1 timestamp = timestamp.get_next() if timestamp is None: break database_connection.close() print 'generation finished' print '--------------------------------' best_candidates_count = sum(best_candidates) all_candidates_count = sum(all_candidates) best_candidates_percentage = best_candidates * 1.0 / best_candidates_count all_candidates_percentage = all_candidates * 1.0 / all_candidates_count print 'validation finished' print '\nhammings result:' print np.round(best_candidates_percentage * 100, 1) print np.round(all_candidates_percentage * 100, 1) print '\ngaps:' print[g for g in sorted(gaps.items())] print 'mean gap length: ' + str( sum([l * n for l, n in gaps.items()]) * 1.0 / sum([n for l, n in gaps.items()]))
def start( self ): self.path_manager.clear() self.dset_store.clear() timestamp = config.START_TIMESTAMP duration = config.FRAMES_DURATION print 'start generation' print ' host = ' + config.DB_HOST + ', date = ' + timestamp.date_name + ', cam = ' + str(timestamp.cam) print ' start time = ' + timestamp.time_name + ', duration = ' + str(duration) + ' frames' database_connection = db.Connection() if not timestamp.exists( database_connection ): database_connection.close() print 'timestamp ' + timestamp.time_name + ' not found' print 'generation stopped' print '--------------------------------' return best_candidates = np.zeros( 13, dtype = np.int ) # nach Hammingabstand 0-12 all_candidates = np.zeros( 13, dtype = np.int ) # nach Hammingabstand 0-12 gaps = {} for x in range( 0, duration ): print 'processing timestamp ' + timestamp.time_name dset = self.dset_store.get( timestamp, database_connection ) for d in dset.detections: truth_id = database_connection.get_truth_id( d ) if truth_id is not None: candidates = list(set([ c[0] for c in d.candidate_ids ])) hammings = [ aux.hamming_distance( truth_id, c ) for c in candidates ] hammings.sort() best_candidates[ hammings[0] ] += 1 for h in hammings: all_candidates[ h ] += 1 path = self.path_manager.get_path( truth_id ) gap = path.add_detection_and_return_gap( d ) if gap is not None: gap = min(gap,16) # greater or equal 16 not listed separately if not gap in gaps: gaps[ gap ] = 0 gaps[ gap ] += 1 timestamp = timestamp.get_next() if timestamp is None: break database_connection.close() print 'generation finished' print '--------------------------------' best_candidates_count = sum( best_candidates ) all_candidates_count = sum( all_candidates ) best_candidates_percentage = best_candidates*1.0 / best_candidates_count all_candidates_percentage = all_candidates*1.0 / all_candidates_count print 'validation finished' print '\nhammings result:' print np.round( best_candidates_percentage*100, 1 ) print np.round( all_candidates_percentage*100, 1 ) print '\ngaps:' print [ g for g in sorted( gaps.items() ) ]
def xgboost_learning(path, dset): SCORE_THRESHOLD = 5000 mset = ds.MatchSet() last_unemtpy_detection = ld = path.get_sorted_unempty_detections()[-1] frames_gap = ld.timestamp.frames_difference( dset.detections[0].timestamp) - 1 # Number of detections within a radius of 50, 100, 200 or 300 neighbors50 = 0 neighbors100 = 0 neighbors200 = 0 neighbors300 = 0 for d in dset.detections: euclidian_distance = aux.euclidian_distance(ld.position, d.position) if euclidian_distance <= 50: neighbors50 += 1 if euclidian_distance <= 100: neighbors100 += 1 if euclidian_distance <= 200: neighbors200 += 1 if euclidian_distance <= 300: neighbors300 += 1 for d in dset.detections: # euclidian distance #euclidian_distance_squared = aux.squared_distance( ld.position, d.position ) # int euclidian_distance = aux.euclidian_distance(ld.position, d.position) # float # hamming distance: int hamming_distance = aux.hamming_distance(ld.decoded_mean, d.decoded_mean) bit_distances = np.abs(np.subtract(d.decoded_id, ld.decoded_id)) max_bit_distance = np.max(bit_distances) mean_bit_distance = np.mean(bit_distances) confidence = np.min(np.abs(0.5 - d.decoded_id) * 2) x_rotation_difference = abs(d.x_rotation - ld.x_rotation) if x_rotation_difference > np.pi: x_rotation_difference = 2 * np.pi - x_rotation_difference y_rotation_difference = abs(d.y_rotation - ld.y_rotation) if y_rotation_difference > np.pi: y_rotation_difference = 2 * np.pi - y_rotation_difference z_rotation_difference = abs(d.z_rotation - ld.z_rotation) if z_rotation_difference > np.pi: z_rotation_difference = 2 * np.pi - z_rotation_difference data_point = [ str(frames_gap), "%.1f" % euclidian_distance, str(neighbors50), str(neighbors100), str(neighbors200), str(neighbors300), str(hamming_distance), "%.2f" % max_bit_distance, "%.2f" % mean_bit_distance, "%.2f" % confidence, "%.2f" % d.localizer_saliency, "%.2f" % ld.localizer_saliency, "%.2f" % abs(d.localizer_saliency - ld.localizer_saliency), "%.2f" % x_rotation_difference, "%.2f" % y_rotation_difference, "%.2f" % z_rotation_difference, ] xgb_data = xgb.DMatrix(np.array([data_point])) score = int(round((1 - XGB_MODEL.predict(xgb_data)) * 10000)) if score <= SCORE_THRESHOLD: mset.append((d, score)) mset.sort() mset.truncate(MATCHSET_SIZE) return mset
def start( self ): self.path_manager.clear() self.dset_store.clear() start_timestamp = ds.TimeStamp( config.FRAME_START, config.CAM ) duration = config.FRAME_END - config.FRAME_START + 1 previous_timestamp = start_timestamp for x in range( config.FRAME_START+1, config.FRAME_END+1 ): timestamp = ds.TimeStamp( x, config.CAM ) timestamp.connect_with_previous( previous_timestamp ) previous_timestamp = timestamp print 'start generation' print ( ' date = ' + str( config.DATE[ 0 ] ) + '/' + str( config.DATE[ 1 ] ) + '/' + str( config.DATE[ 2 ] ) + ', ' + 'time = ' + str( config.TIME[ 0 ] ) + ':' + str( config.TIME[ 1 ] ) ) print ' cam = ' + str( config.CAM ) + ', frames = ' + str( config.FRAME_START ) + ' til ' + str( config.FRAME_END ) database_connection = db.Connection() timestamp = start_timestamp datalines_written = 0 if os.path.isfile( 'dataset.csv' ): os.remove( 'dataset.csv' ) with open( 'dataset.csv', 'a' ) as my_file: data_output = [ 'frames-gap', 'euclidian-distance', 'neighbors-50', 'neighbors-100', 'neighbors-200', 'neighbors-300', 'hamming-distance', 'max_bit_distance', 'mean_bit_distance', 'confidence', 'detection1_saliency', 'detection2_saliency', 'saliency_difference', 'x_rotation_difference', 'y_rotation_difference', 'z_rotation_difference', 'match' ] my_file.write( ', '.join( data_output ) + '\n' ) for x in range( 0, duration ): print 'processing timestamp ' + timestamp.time_name dset = self.dset_store.get( timestamp, database_connection ) for path_bee_id, truth_path in self.path_manager.paths.iteritems(): last_detection = ld = truth_path.get_last_detection() # frames gap: integer. 0 = no blank gap = one frame difference to next frames_gap = last_detection.timestamp.frames_difference( timestamp ) - 1 # Number of detections within a radius of 50, 100, 200 or 300 neighbors50 = 0 neighbors100 = 0 neighbors200 = 0 neighbors300 = 0 for d in dset.detections: euclidian_distance = aux.euclidian_distance( ld.position, d.position ) if euclidian_distance <= 50: neighbors50 += 1 if euclidian_distance <= 100: neighbors100 += 1 if euclidian_distance <= 200: neighbors200 += 1 if euclidian_distance <= 300: neighbors300 += 1 for d in dset.detections: truth_id = database_connection.get_truth_id( d ) match = int( truth_id == path_bee_id ) # whether the detection belongs to the path if frames_gap < FRAMES_GAP_LIMIT and ( match or OUTPUT_NONMATCHING ): # euclidian distance #euclidian_distance_squared = aux.squared_distance( ld.position, d.position ) # int euclidian_distance = aux.euclidian_distance( ld.position, d.position ) # float # hamming distance: int hamming_distance = aux.hamming_distance( ld.decoded_mean, d.decoded_mean ) bit_distances = np.abs( np.subtract( d.decoded_id, ld.decoded_id ) ) max_bit_distance = np.max( bit_distances ) mean_bit_distance = np.mean( bit_distances ) confidence = np.min( np.abs( 0.5 - d.decoded_id ) * 2 ) x_rotation_difference = abs( d.x_rotation - ld.x_rotation ) if x_rotation_difference > np.pi: x_rotation_difference = 2*np.pi - x_rotation_difference y_rotation_difference = abs( d.y_rotation - ld.y_rotation ) if y_rotation_difference > np.pi: y_rotation_difference = 2*np.pi - y_rotation_difference z_rotation_difference = abs( d.z_rotation - ld.z_rotation ) if z_rotation_difference > np.pi: z_rotation_difference = 2*np.pi - z_rotation_difference data_output = [ str( frames_gap ), "%.1f" % euclidian_distance, str( neighbors50 ), str( neighbors100 ), str( neighbors200 ), str( neighbors300 ), str( hamming_distance ), "%.2f" % max_bit_distance, "%.2f" % mean_bit_distance, "%.2f" % confidence, "%.2f" % d.localizer_saliency, "%.2f" % ld.localizer_saliency, "%.2f" % abs( d.localizer_saliency - ld.localizer_saliency ), "%.2f" % x_rotation_difference, "%.2f" % y_rotation_difference, "%.2f" % z_rotation_difference, str(match) ] #print data_output my_file.write( ', '.join( data_output ) + '\n' ) datalines_written += 1 for d in dset.detections: truth_id = database_connection.get_truth_id( d ) if truth_id is not None: truth_path = self.path_manager.get_path( truth_id ) truth_path.add_detection( d ) timestamp = timestamp.get_next() if timestamp is None: break print 'generation finished' print '--------------------------------' print str(datalines_written) + ' lines written to dataset.csv'
def xgboost_learning(path, dset): SCORE_THRESHOLD = 5000 mset = ds.MatchSet() last_unemtpy_match = path.get_sorted_unempty_matches()[-1] last_detection = ld = last_unemtpy_match.detection # frames gap: integer. 0 = no blank gap = one frame difference to next frames_gap = last_detection.timestamp.frames_difference( dset.detections[0].timestamp) - 1 # Number of detections within a radius of 50, 100, 200 or 300 neighbors50 = 0 neighbors100 = 0 neighbors200 = 0 neighbors300 = 0 for d in dset.detections: euclidian_distance = aux.euclidian_distance(ld.position, d.position) if euclidian_distance <= 50: neighbors50 += 1 if euclidian_distance <= 100: neighbors100 += 1 if euclidian_distance <= 200: neighbors200 += 1 if euclidian_distance <= 300: neighbors300 += 1 for d in dset.detections: # euclidian distance euclidian_distance = aux.euclidian_distance(ld.position, d.position) # float d_distinct_ids = list(set(d.candidate_ids)) ld_distinct_ids = list(set(ld.candidate_ids)) for d_can_id, d_can_score, d_can_orientation in d_distinct_ids: for ld_can_id, ld_can_score, ld_can_orientation in ld_distinct_ids: # hamming distance: int hamming_distance = aux.hamming_distance(ld_can_id, d_can_id) # difference of orientation angle to last detection of path, in radians o_change = abs(ld_can_orientation - d_can_orientation) if o_change > np.pi: o_change = 2 * np.pi - o_change # looking from the last detection towards its orientation, # what is the angle to the position of the next detection, in radians o_to_next = np.arctan2(d.position[1] - ld.position[1], d.position[0] - ld.position[0]) o_deviation = abs(ld_can_orientation - o_to_next) if o_deviation > np.pi: o_deviation = 2 * np.pi - o_deviation data_point = [ frames_gap, euclidian_distance, neighbors50, neighbors100, neighbors200, neighbors300, hamming_distance, o_change, o_deviation, d_can_score, ld_can_score ] xgb_data = xgb.DMatrix(np.array([data_point])) score = int(round((1 - XGB_MODEL.predict(xgb_data)) * 10000)) if score <= SCORE_THRESHOLD: mset.append((ds.Match(d, d_can_id), score)) mset.sort() mset.truncate(MATCHSET_SIZE) return mset
def start( self ): self.path_manager.clear() self.dset_store.clear() timestamp = config.START_TIMESTAMP duration = config.FRAMES_DURATION print 'start generation' print ' host = ' + config.DB_HOST + ', date = ' + timestamp.date_name + ', cam = ' + str(timestamp.cam) print ' start time = ' + timestamp.time_name + ', duration = ' + str(duration) + ' frames' database_connection = db.Connection() if not timestamp.exists( database_connection ): database_connection.close() print 'timestamp ' + timestamp.time_name + ' not found' print 'generation stopped' print '--------------------------------' return datalines_written = 0 if os.path.isfile( 'dataset.csv' ): os.remove( 'dataset.csv' ) with open( 'dataset.csv', 'a' ) as my_file: data_output = [ 'frames-gap', 'euclidian-distance', 'neighbors-50', 'neighbors-100', 'neighbors-200', 'neighbors-300', 'hamming-distance', 'hamming-distance-path', 'orientation-change', 'orientation-deviation', 'detection1_candidate-score', 'detection2_candidate-score', 'match' ] my_file.write( ', '.join( data_output ) + '\n' ) for x in range( 0, duration ): print 'processing timestamp ' + timestamp.time_name dset = self.dset_store.get( timestamp, database_connection ) for path_bee_id, truth_path in self.path_manager.paths.iteritems(): last_detection = ld = truth_path.get_last_detection() # frames gap: integer. 0 = no blank gap = one frame difference to next frames_gap = last_detection.timestamp.frames_difference( timestamp ) - 1 # Number of detections within a radius of 50, 100, 200 or 300 neighbors50 = 0 neighbors100 = 0 neighbors200 = 0 neighbors300 = 0 for d in dset.detections: euclidian_distance = aux.euclidian_distance( ld.position, d.position ) if euclidian_distance <= 50: neighbors50 += 1 if euclidian_distance <= 100: neighbors100 += 1 if euclidian_distance <= 200: neighbors200 += 1 if euclidian_distance <= 300: neighbors300 += 1 for d in dset.detections: truth_id = database_connection.get_truth_id( d ) match = int( truth_id == path_bee_id ) # whether the detection belongs to the path if frames_gap < FRAMES_GAP_LIMIT and ( match or OUTPUT_NONMATCHING ): # euclidian distance #euclidian_distance_squared = aux.squared_distance( ld.position, d.position ) # int euclidian_distance = aux.euclidian_distance( ld.position, d.position ) # float d_distinct_ids = list( set( d.candidate_ids ) ) ld_distinct_ids = list( set( ld.candidate_ids ) ) for d_can_id,d_can_score,d_can_orientation in d_distinct_ids: for ld_can_id,ld_can_score,ld_can_orientation in ld_distinct_ids: # hamming distance: int hamming_distance = aux.hamming_distance( ld_can_id, d_can_id ) # hamming distance path: float hamming_distance_path = truth_path.fast_average_hamming_distance( d_can_id ) # difference of orientation angle to last detection of path, in radians o_change = abs( ld_can_orientation - d_can_orientation ) if o_change > np.pi: o_change = 2*np.pi - o_change # looking from the last detection towards its orientation, # what is the angle to the position of the next detection, in radians o_to_next = np.arctan2( d.position[1] - ld.position[1], d.position[0] - ld.position[0] ) o_deviation = abs( ld_can_orientation - o_to_next ) if o_deviation > np.pi: o_deviation = 2*np.pi - o_deviation data_output = [ str( frames_gap ), "%.1f" % euclidian_distance, str( neighbors50 ), str( neighbors100 ), str( neighbors200 ), str( neighbors300 ), str( hamming_distance ), "%.2f" % hamming_distance_path, "%.3f" % o_change, "%.3f" % o_deviation, str(d_can_score), str(ld_can_score), str(match) ] my_file.write( ', '.join( data_output ) + '\n' ) datalines_written += 1 for d in dset.detections: truth_id = database_connection.get_truth_id( d ) if truth_id is not None: truth_path = self.path_manager.get_path( truth_id ) truth_path.add_detection( d ) timestamp = timestamp.get_next( database_connection ) if timestamp is None: break database_connection.close() print 'generation finished' print '--------------------------------' print str(datalines_written) + ' lines written to dataset.csv'
def start(self): self.path_manager.clear() self.dset_store.clear() timestamp = config.START_TIMESTAMP duration = config.FRAMES_DURATION print 'start generation' print ' host = ' + config.DB_HOST + ', date = ' + timestamp.date_name + ', cam = ' + str( timestamp.cam) print ' start time = ' + timestamp.time_name + ', duration = ' + str( duration) + ' frames' database_connection = db.Connection() if not timestamp.exists(database_connection): database_connection.close() print 'timestamp ' + timestamp.time_name + ' not found' print 'generation stopped' print '--------------------------------' return datalines_written = 0 if os.path.isfile('dataset.csv'): os.remove('dataset.csv') with open('dataset.csv', 'a') as my_file: data_output = [ 'frames-gap', 'euclidian-distance', 'neighbors-50', 'neighbors-100', 'neighbors-200', 'neighbors-300', 'hamming-distance', 'hamming-distance-path', 'orientation-change', 'orientation-deviation', 'detection1_candidate-score', 'detection2_candidate-score', 'match' ] my_file.write(', '.join(data_output) + '\n') for x in range(0, duration): print 'processing timestamp ' + timestamp.time_name dset = self.dset_store.get(timestamp, database_connection) for path_bee_id, truth_path in self.path_manager.paths.iteritems( ): last_detection = ld = truth_path.get_last_detection() # frames gap: integer. 0 = no blank gap = one frame difference to next frames_gap = last_detection.timestamp.frames_difference( timestamp) - 1 # Number of detections within a radius of 50, 100, 200 or 300 neighbors50 = 0 neighbors100 = 0 neighbors200 = 0 neighbors300 = 0 for d in dset.detections: euclidian_distance = aux.euclidian_distance( ld.position, d.position) if euclidian_distance <= 50: neighbors50 += 1 if euclidian_distance <= 100: neighbors100 += 1 if euclidian_distance <= 200: neighbors200 += 1 if euclidian_distance <= 300: neighbors300 += 1 for d in dset.detections: truth_id = database_connection.get_truth_id(d) match = int( truth_id == path_bee_id ) # whether the detection belongs to the path if frames_gap < FRAMES_GAP_LIMIT and ( match or OUTPUT_NONMATCHING): # euclidian distance #euclidian_distance_squared = aux.squared_distance( ld.position, d.position ) # int euclidian_distance = aux.euclidian_distance( ld.position, d.position) # float d_distinct_ids = list(set(d.candidate_ids)) ld_distinct_ids = list(set(ld.candidate_ids)) for d_can_id, d_can_score, d_can_orientation in d_distinct_ids: for ld_can_id, ld_can_score, ld_can_orientation in ld_distinct_ids: # hamming distance: int hamming_distance = aux.hamming_distance( ld_can_id, d_can_id) # hamming distance path: float hamming_distance_path = truth_path.fast_average_hamming_distance( d_can_id) # difference of orientation angle to last detection of path, in radians o_change = abs(ld_can_orientation - d_can_orientation) if o_change > np.pi: o_change = 2 * np.pi - o_change # looking from the last detection towards its orientation, # what is the angle to the position of the next detection, in radians o_to_next = np.arctan2( d.position[1] - ld.position[1], d.position[0] - ld.position[0]) o_deviation = abs(ld_can_orientation - o_to_next) if o_deviation > np.pi: o_deviation = 2 * np.pi - o_deviation data_output = [ str(frames_gap), "%.1f" % euclidian_distance, str(neighbors50), str(neighbors100), str(neighbors200), str(neighbors300), str(hamming_distance), "%.2f" % hamming_distance_path, "%.3f" % o_change, "%.3f" % o_deviation, str(d_can_score), str(ld_can_score), str(match) ] my_file.write(', '.join(data_output) + '\n') datalines_written += 1 for d in dset.detections: truth_id = database_connection.get_truth_id(d) if truth_id is not None: truth_path = self.path_manager.get_path(truth_id) truth_path.add_detection(d) timestamp = timestamp.get_next(database_connection) if timestamp is None: break database_connection.close() print 'generation finished' print '--------------------------------' print str(datalines_written) + ' lines written to dataset.csv'
def xgboost_learning( path, dset ): SCORE_THRESHOLD = 5000 mset = ds.MatchSet() last_unemtpy_match = path.get_sorted_unempty_matches()[ -1 ] last_detection = ld = last_unemtpy_match.detection # frames gap: integer. 0 = no blank gap = one frame difference to next frames_gap = last_detection.timestamp.frames_difference( dset.detections[0].timestamp ) - 1 # Number of detections within a radius of 50, 100, 200 or 300 neighbors50 = 0 neighbors100 = 0 neighbors200 = 0 neighbors300 = 0 for d in dset.detections: euclidian_distance = aux.euclidian_distance( ld.position, d.position ) if euclidian_distance <= 50: neighbors50 += 1 if euclidian_distance <= 100: neighbors100 += 1 if euclidian_distance <= 200: neighbors200 += 1 if euclidian_distance <= 300: neighbors300 += 1 for d in dset.detections: # euclidian distance euclidian_distance = aux.euclidian_distance( ld.position, d.position ) # float d_distinct_ids = list( set( d.candidate_ids ) ) ld_distinct_ids = list( set( ld.candidate_ids ) ) for d_can_id,d_can_score,d_can_orientation in d_distinct_ids: for ld_can_id,ld_can_score,ld_can_orientation in ld_distinct_ids: # hamming distance: int hamming_distance = aux.hamming_distance( ld_can_id, d_can_id ) # difference of orientation angle to last detection of path, in radians o_change = abs( ld_can_orientation - d_can_orientation ) if o_change > np.pi: o_change = 2*np.pi - o_change # looking from the last detection towards its orientation, # what is the angle to the position of the next detection, in radians o_to_next = np.arctan2( d.position[1] - ld.position[1], d.position[0] - ld.position[0] ) o_deviation = abs( ld_can_orientation - o_to_next ) if o_deviation > np.pi: o_deviation = 2*np.pi - o_deviation data_point = [ frames_gap, euclidian_distance, neighbors50, neighbors100, neighbors200, neighbors300, hamming_distance, o_change, o_deviation, d_can_score, ld_can_score ] xgb_data = xgb.DMatrix( np.array( [data_point] ) ) score = int(round( ( 1 - XGB_MODEL.predict( xgb_data ) ) * 10000 )) if score <= SCORE_THRESHOLD: mset.append( ( ds.Match( d, d_can_id ), score ) ) mset.sort() mset.truncate( MATCHSET_SIZE ) return mset