def __iter__(self): return stream.iter_csv( self.path, target="category", converters={ "region-centroid-col": int, "region-centroid-row": int, "short-line-density-5": float, "short-line-density-2": float, "vedge-mean": float, "vegde-sd": float, "hedge-mean": float, "hedge-sd": float, "intensity-mean": float, "rawred-mean": float, "rawblue-mean": float, "rawgreen-mean": float, "exred-mean": float, "exblue-mean": float, "exgreen-mean": float, "value-mean": float, "saturation-mean": float, "hue-mean": float, }, )
def test_iter_csv_custom_converter(): example = io.StringIO("col1,col2,col3\n,1,2\n5,,4\n3,1,") def int_or_none(s): try: return int(s) except ValueError: return None params = { "converters": { "col1": int_or_none, "col2": int_or_none, "col3": int_or_none } } dataset = stream.iter_csv(example, **params) assert list(dataset) == [ ({ "col1": None, "col2": 1, "col3": 2 }, None), ({ "col1": 5, "col2": None, "col3": 4 }, None), ({ "col1": 3, "col2": 1, "col3": None }, None), ]
def __iter__(self): return stream.iter_csv( self.path, target="passengers", converters={"passengers": int}, parse_dates={"month": "%Y-%m"}, )
def _iter(self): return stream.iter_csv( self.path, target='y', delimiter=',', quotechar='"', field_size_limit=1_000_000, )
def _iter(self): converters = { "duration": float, "src_bytes": float, "dst_bytes": float, "service": int, } return stream.iter_csv(self.path, target="service", converters=converters)
def _iter(self): converters = {f"V{i}": float for i in range(1, 29)} converters["Class"] = int converters["Time"] = float converters["Amount"] = float return stream.iter_csv(self.path, target="Class", converters=converters)
def _iter(self): return stream.iter_csv(self.path, target='service', converters={ 'duration': float, 'src_bytes': float, 'dst_bytes': float, 'service': int })
def __iter__(self): return stream.iter_csv(self.path, target='weight', converters={ 'time': int, 'weight': int, 'chick': int, 'diet': int })
def _iter(self): return stream.iter_csv(self.path, target='visitors', converters={ 'latitude': float, 'longitude': float, 'visitors': int, 'is_holiday': ast.literal_eval }, parse_dates={'date': '%Y-%m-%d'})
def _iter(self): return stream.iter_csv(self.path, target='rating', converters={ 'timestamp': int, 'release_date': int, 'age': float, 'rating': float }, delimiter='\t')
def __iter__(self): return stream.iter_csv( self.path, target="weight", converters={ "time": int, "weight": int, "chick": int, "diet": int }, )
def _iter(self): return stream.iter_csv( self.path, target="visitors", converters={ "latitude": float, "longitude": float, "visitors": int, "is_holiday": ast.literal_eval, }, parse_dates={"date": "%Y-%m-%d"}, )
def __iter__(self): return stream.iter_csv(self.path, target='five_thirty_eight', converters={ 'ordinal_date': int, 'gallup': float, 'ipsos': float, 'morning_consult': float, 'rasmussen': float, 'you_gov': float, 'five_thirty_eight': float })
def _iter(self): return stream.iter_csv( self.path, target="rating", converters={ "timestamp": int, "release_date": int, "age": float, "rating": float, }, delimiter="\t", )
def _iter(self): return stream.iter_csv(self.path, target='bikes', converters={ 'clouds': int, 'humidity': int, 'pressure': float, 'temperature': float, 'wind': float, 'bikes': int }, parse_dates={'moment': '%Y-%m-%d %H:%M:%S'})
def __iter__(self): return stream.iter_csv( self.path, target="five_thirty_eight", converters={ "ordinal_date": int, "gallup": float, "ipsos": float, "morning_consult": float, "rasmussen": float, "you_gov": float, "five_thirty_eight": float, }, )
def _iter(self): return stream.iter_csv( self.path, target='trip_duration', converters={ 'passenger_count': int, 'pickup_longitude': float, 'pickup_latitude': float, 'dropoff_longitude': float, 'dropoff_latitude': float, 'trip_duration': int }, parse_dates={'pickup_datetime': '%Y-%m-%d %H:%M:%S'}, drop=['dropoff_datetime', 'id'])
def _iter(self): return stream.iter_csv( self.path, target="bikes", converters={ "clouds": int, "humidity": int, "pressure": float, "temperature": float, "wind": float, "bikes": int, }, parse_dates={"moment": "%Y-%m-%d %H:%M:%S"}, )
def _iter(self): return stream.iter_csv( self.path, target="trip_duration", converters={ "passenger_count": int, "pickup_longitude": float, "pickup_latitude": float, "dropoff_longitude": float, "dropoff_latitude": float, "trip_duration": int, }, parse_dates={"pickup_datetime": "%Y-%m-%d %H:%M:%S"}, drop=["dropoff_datetime", "id"], )
def __iter__(self): return stream.iter_csv(self.path, target='is_phishing', converters={ 'empty_server_form_handler': float, 'popup_window': float, 'https': float, 'request_from_other_domain': float, 'anchor_from_other_domain': float, 'is_popular': float, 'long_url': float, 'age_of_domain': int, 'ip_in_url': int, 'is_phishing': lambda x: x == '1' })
def _iter(self): return stream.iter_csv( self.path, target="class", converters={ "date": float, "day": int, "period": float, "nswprice": float, "nswdemand": float, "vicprice": float, "vicdemand": float, "transfer": float, "class": lambda x: x == "UP", }, )
def __iter__(self): return stream.iter_csv( self.path, target="is_phishing", converters={ "empty_server_form_handler": float, "popup_window": float, "https": float, "request_from_other_domain": float, "anchor_from_other_domain": float, "is_popular": float, "long_url": float, "age_of_domain": int, "ip_in_url": int, "is_phishing": lambda x: x == "1", }, )
def _iter(self): features = [ 'lepton pT', 'lepton eta', 'lepton phi', 'missing energy magnitude', 'missing energy phi', 'jet 1 pt', 'jet 1 eta', 'jet 1 phi', 'jet 1 b-tag', 'jet 2 pt', 'jet 2 eta', 'jet 2 phi', 'jet 2 b-tag', 'jet 3 pt', 'jet 3 eta', 'jet 3 phi', 'jet 3 b-tag', 'jet 4 pt', 'jet 4 eta', 'jet 4 phi', 'jet 4 b-tag', 'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb' ] return stream.iter_csv( self.path, fieldnames=['is_signal', *features], target='is_signal', converters={'is_signal': lambda x: x.startswith('1'), **{f: float for f in features}} )
def __iter__(self): return stream.iter_csv( self.path, target=['c-class-flares', 'm-class-flares', 'x-class-flares'], converters={ 'zurich-class': str, 'largest-spot-size': str, 'spot-distribution': str, 'activity': int, 'evolution': int, 'previous-24h-flare-activity': int, 'hist-complex': int, 'hist-complex-this-pass': int, 'area': int, 'largest-spot-area': int, 'c-class-flares': int, 'm-class-flares': int, 'x-class-flares': int })
def _iter(self): features = [ "lepton pT", "lepton eta", "lepton phi", "missing energy magnitude", "missing energy phi", "jet 1 pt", "jet 1 eta", "jet 1 phi", "jet 1 b-tag", "jet 2 pt", "jet 2 eta", "jet 2 phi", "jet 2 b-tag", "jet 3 pt", "jet 3 eta", "jet 3 phi", "jet 3 b-tag", "jet 4 pt", "jet 4 eta", "jet 4 phi", "jet 4 b-tag", "m_jj", "m_jjj", "m_lv", "m_jlv", "m_bb", "m_wbb", "m_wwbb", ] return stream.iter_csv( self.path, fieldnames=["is_signal", *features], target="is_signal", converters={ "is_signal": lambda x: x.startswith("1"), **{f: float for f in features}, }, )
def _iter(self): X_y = stream.iter_csv( self.path, target="rating", converters={ "timestamp": int, "release_date": int, "age": float, "rating": float, }, delimiter="\t", ) if self.unpack_user_and_item: for x, y in X_y: user = x.pop("user") item = x.pop("item") yield x, y, {"user": user, "item": item} else: yield from X_y
def __iter__(self): return stream.iter_csv( self.path, target=["c-class-flares", "m-class-flares", "x-class-flares"], converters={ "zurich-class": str, "largest-spot-size": str, "spot-distribution": str, "activity": int, "evolution": int, "previous-24h-flare-activity": int, "hist-complex": int, "hist-complex-this-pass": int, "area": int, "largest-spot-area": int, "c-class-flares": int, "m-class-flares": int, "x-class-flares": int, }, )
def _iter(self): converters = { "H.period": float, "DD.period.t": float, "UD.period.t": float, "H.t": float, "DD.t.i": float, "UD.t.i": float, "H.i": float, "DD.i.e": float, "UD.i.e": float, "H.e": float, "DD.e.five": float, "UD.e.five": float, "H.five": float, "DD.five.Shift.r": float, "UD.five.Shift.r": float, "H.Shift.r": float, "DD.Shift.r.o": float, "UD.Shift.r.o": float, "H.o": float, "DD.o.a": float, "UD.o.a": float, "H.a": float, "DD.a.n": float, "UD.a.n": float, "H.n": float, "DD.n.l": float, "UD.n.l": float, "H.l": float, "DD.l.Return": float, "UD.l.Return": float, "H.Return": float, } return stream.iter_csv( self.path, target="subject", converters=converters, drop=["sessionIndex", "rep"], )
def __iter__(self): return stream.iter_csv(self.path, target='category', converters={ 'region-centroid-col': int, 'region-centroid-row': int, 'short-line-density-5': float, 'short-line-density-2': float, 'vedge-mean': float, 'vegde-sd': float, 'hedge-mean': float, 'hedge-sd': float, 'intensity-mean': float, 'rawred-mean': float, 'rawblue-mean': float, 'rawgreen-mean': float, 'exred-mean': float, 'exblue-mean': float, 'exgreen-mean': float, 'value-mean': float, 'saturation-mean': float, 'hue-mean': float })
def _iter(self): return stream.iter_csv(self.path, target=[ 'amazed-suprised', 'happy-pleased', 'relaxing-clam', 'quiet-still', 'sad-lonely', 'angry-aggresive' ], converters={ 'amazed-suprised': lambda x: x == '1', 'happy-pleased': lambda x: x == '1', 'relaxing-clam': lambda x: x == '1', 'quiet-still': lambda x: x == '1', 'sad-lonely': lambda x: x == '1', 'angry-aggresive': lambda x: x == '1', 'Mean_Acc1298_Mean_Mem40_Centroid': float, 'Mean_Acc1298_Mean_Mem40_Rolloff': float, 'Mean_Acc1298_Mean_Mem40_Flux': float, 'Mean_Acc1298_Mean_Mem40_MFCC_0': float, 'Mean_Acc1298_Mean_Mem40_MFCC_1': float, 'Mean_Acc1298_Mean_Mem40_MFCC_2': float, 'Mean_Acc1298_Mean_Mem40_MFCC_3': float, 'Mean_Acc1298_Mean_Mem40_MFCC_4': float, 'Mean_Acc1298_Mean_Mem40_MFCC_5': float, 'Mean_Acc1298_Mean_Mem40_MFCC_6': float, 'Mean_Acc1298_Mean_Mem40_MFCC_7': float, 'Mean_Acc1298_Mean_Mem40_MFCC_8': float, 'Mean_Acc1298_Mean_Mem40_MFCC_9': float, 'Mean_Acc1298_Mean_Mem40_MFCC_10': float, 'Mean_Acc1298_Mean_Mem40_MFCC_11': float, 'Mean_Acc1298_Mean_Mem40_MFCC_12': float, 'Mean_Acc1298_Std_Mem40_Centroid': float, 'Mean_Acc1298_Std_Mem40_Rolloff': float, 'Mean_Acc1298_Std_Mem40_Flux': float, 'Mean_Acc1298_Std_Mem40_MFCC_0': float, 'Mean_Acc1298_Std_Mem40_MFCC_1': float, 'Mean_Acc1298_Std_Mem40_MFCC_2': float, 'Mean_Acc1298_Std_Mem40_MFCC_3': float, 'Mean_Acc1298_Std_Mem40_MFCC_4': float, 'Mean_Acc1298_Std_Mem40_MFCC_5': float, 'Mean_Acc1298_Std_Mem40_MFCC_6': float, 'Mean_Acc1298_Std_Mem40_MFCC_7': float, 'Mean_Acc1298_Std_Mem40_MFCC_8': float, 'Mean_Acc1298_Std_Mem40_MFCC_9': float, 'Mean_Acc1298_Std_Mem40_MFCC_10': float, 'Mean_Acc1298_Std_Mem40_MFCC_11': float, 'Mean_Acc1298_Std_Mem40_MFCC_12': float, 'Std_Acc1298_Mean_Mem40_Centroid': float, 'Std_Acc1298_Mean_Mem40_Rolloff': float, 'Std_Acc1298_Mean_Mem40_Flux': float, 'Std_Acc1298_Mean_Mem40_MFCC_0': float, 'Std_Acc1298_Mean_Mem40_MFCC_1': float, 'Std_Acc1298_Mean_Mem40_MFCC_2': float, 'Std_Acc1298_Mean_Mem40_MFCC_3': float, 'Std_Acc1298_Mean_Mem40_MFCC_4': float, 'Std_Acc1298_Mean_Mem40_MFCC_5': float, 'Std_Acc1298_Mean_Mem40_MFCC_6': float, 'Std_Acc1298_Mean_Mem40_MFCC_7': float, 'Std_Acc1298_Mean_Mem40_MFCC_8': float, 'Std_Acc1298_Mean_Mem40_MFCC_9': float, 'Std_Acc1298_Mean_Mem40_MFCC_10': float, 'Std_Acc1298_Mean_Mem40_MFCC_11': float, 'Std_Acc1298_Mean_Mem40_MFCC_12': float, 'Std_Acc1298_Std_Mem40_Centroid': float, 'Std_Acc1298_Std_Mem40_Rolloff': float, 'Std_Acc1298_Std_Mem40_Flux': float, 'Std_Acc1298_Std_Mem40_MFCC_0': float, 'Std_Acc1298_Std_Mem40_MFCC_1': float, 'Std_Acc1298_Std_Mem40_MFCC_2': float, 'Std_Acc1298_Std_Mem40_MFCC_3': float, 'Std_Acc1298_Std_Mem40_MFCC_4': float, 'Std_Acc1298_Std_Mem40_MFCC_5': float, 'Std_Acc1298_Std_Mem40_MFCC_6': float, 'Std_Acc1298_Std_Mem40_MFCC_7': float, 'Std_Acc1298_Std_Mem40_MFCC_8': float, 'Std_Acc1298_Std_Mem40_MFCC_9': float, 'Std_Acc1298_Std_Mem40_MFCC_10': float, 'Std_Acc1298_Std_Mem40_MFCC_11': float, 'Std_Acc1298_Std_Mem40_MFCC_12': float, 'BH_LowPeakAmp': float, 'BH_LowPeakBPM': int, 'BH_HighPeakAmp': float, 'BH_HighPeakBPM': int, 'BH_HighLowRatio': int, 'BHSUM1': float, 'BHSUM2': float, 'BHSUM3': float })