def feature_vector_generator (subject, t0, t1, sq=0): '''Returns a generator of feature vectors for subject between t0 and t1. All returned vectors are guaranteed to be equal to or above signal quality sq.''' # get all the readings for subject between t0 and t1 readings = querying.readings(subject, t0, t1) # group readings into lists of length `vector_resolution` groups = grouper(vector_resolution, readings) for g in groups: readings = filter(None, g) # throw out readings with fewer signals than our desired resolution if len(readings) == vector_resolution: yield make_feature_vector(readings)
def feature_vector_generator(task, subject, position, sessionnum="", sq=""): '''Returns a generator of feature vectors for a specific task, subject, position, and session. All returned vectors are guaranteed to be equal to or above signal quality sq.''' # get all the readings for subject between t0 and t1 readings = querybytask.readings(task, subject, position, sessionnum, sq) # group readings into lists of length `vector_resolution` groups = grouper(vector_resolution, readings) for g in groups: readings = filter(None, g) # throw out readings with fewer signals than our desired resolution if len(readings) == vector_resolution: yield make_feature_vector(readings)
def __init__(self, params): self.size = params['size'] self.root = params['root'] self.sample_rate = 1.0 # this datasets not good for sampling, always use 100% samples. self.x_cols = params['x_cols'] # [data engineering] # time self.train_test_split_time = params['train_test_split_time'] self.train_max_time = params.get('train_max_time') self.train_min_time = params.get('train_min_time') # place_id self.place_min_checkin = params.get('place_min_checkin', 0) self.place_min_last_checkin = params.get('place_min_last_checkin') self.place_max_first_checkin = params.get('place_max_first_checkin') # ctrl self.remove_distance_outlier = params['remove_distance_outlier'] # special self.grp = grouper.grouper(params)