Ejemplo n.º 1
0
 def __iter__(self):
     return stream.iter_csv(
         self.path,
         target="category",
         converters={
             "region-centroid-col": int,
             "region-centroid-row": int,
             "short-line-density-5": float,
             "short-line-density-2": float,
             "vedge-mean": float,
             "vegde-sd": float,
             "hedge-mean": float,
             "hedge-sd": float,
             "intensity-mean": float,
             "rawred-mean": float,
             "rawblue-mean": float,
             "rawgreen-mean": float,
             "exred-mean": float,
             "exblue-mean": float,
             "exgreen-mean": float,
             "value-mean": float,
             "saturation-mean": float,
             "hue-mean": float,
         },
     )
Ejemplo n.º 2
0
def test_iter_csv_custom_converter():

    example = io.StringIO("col1,col2,col3\n,1,2\n5,,4\n3,1,")

    def int_or_none(s):
        try:
            return int(s)
        except ValueError:
            return None

    params = {
        "converters": {
            "col1": int_or_none,
            "col2": int_or_none,
            "col3": int_or_none
        }
    }
    dataset = stream.iter_csv(example, **params)
    assert list(dataset) == [
        ({
            "col1": None,
            "col2": 1,
            "col3": 2
        }, None),
        ({
            "col1": 5,
            "col2": None,
            "col3": 4
        }, None),
        ({
            "col1": 3,
            "col2": 1,
            "col3": None
        }, None),
    ]
Ejemplo n.º 3
0
 def __iter__(self):
     return stream.iter_csv(
         self.path,
         target="passengers",
         converters={"passengers": int},
         parse_dates={"month": "%Y-%m"},
     )
Ejemplo n.º 4
0
 def _iter(self):
     return stream.iter_csv(
         self.path,
         target='y',
         delimiter=',',
         quotechar='"',
         field_size_limit=1_000_000,
     )
Ejemplo n.º 5
0
Archivo: http.py Proyecto: Leo-VK/creme
 def _iter(self):
     converters = {
         "duration": float,
         "src_bytes": float,
         "dst_bytes": float,
         "service": int,
     }
     return stream.iter_csv(self.path, target="service", converters=converters)
Ejemplo n.º 6
0
    def _iter(self):

        converters = {f"V{i}": float for i in range(1, 29)}
        converters["Class"] = int
        converters["Time"] = float
        converters["Amount"] = float

        return stream.iter_csv(self.path, target="Class", converters=converters)
Ejemplo n.º 7
0
 def _iter(self):
     return stream.iter_csv(self.path,
                            target='service',
                            converters={
                                'duration': float,
                                'src_bytes': float,
                                'dst_bytes': float,
                                'service': int
                            })
Ejemplo n.º 8
0
 def __iter__(self):
     return stream.iter_csv(self.path,
                            target='weight',
                            converters={
                                'time': int,
                                'weight': int,
                                'chick': int,
                                'diet': int
                            })
Ejemplo n.º 9
0
 def _iter(self):
     return stream.iter_csv(self.path,
                            target='visitors',
                            converters={
                                'latitude': float,
                                'longitude': float,
                                'visitors': int,
                                'is_holiday': ast.literal_eval
                            },
                            parse_dates={'date': '%Y-%m-%d'})
Ejemplo n.º 10
0
 def _iter(self):
     return stream.iter_csv(self.path,
                            target='rating',
                            converters={
                                'timestamp': int,
                                'release_date': int,
                                'age': float,
                                'rating': float
                            },
                            delimiter='\t')
Ejemplo n.º 11
0
 def __iter__(self):
     return stream.iter_csv(
         self.path,
         target="weight",
         converters={
             "time": int,
             "weight": int,
             "chick": int,
             "diet": int
         },
     )
Ejemplo n.º 12
0
 def _iter(self):
     return stream.iter_csv(
         self.path,
         target="visitors",
         converters={
             "latitude": float,
             "longitude": float,
             "visitors": int,
             "is_holiday": ast.literal_eval,
         },
         parse_dates={"date": "%Y-%m-%d"},
     )
Ejemplo n.º 13
0
 def __iter__(self):
     return stream.iter_csv(self.path,
                            target='five_thirty_eight',
                            converters={
                                'ordinal_date': int,
                                'gallup': float,
                                'ipsos': float,
                                'morning_consult': float,
                                'rasmussen': float,
                                'you_gov': float,
                                'five_thirty_eight': float
                            })
Ejemplo n.º 14
0
 def _iter(self):
     return stream.iter_csv(
         self.path,
         target="rating",
         converters={
             "timestamp": int,
             "release_date": int,
             "age": float,
             "rating": float,
         },
         delimiter="\t",
     )
Ejemplo n.º 15
0
 def _iter(self):
     return stream.iter_csv(self.path,
                            target='bikes',
                            converters={
                                'clouds': int,
                                'humidity': int,
                                'pressure': float,
                                'temperature': float,
                                'wind': float,
                                'bikes': int
                            },
                            parse_dates={'moment': '%Y-%m-%d %H:%M:%S'})
Ejemplo n.º 16
0
 def __iter__(self):
     return stream.iter_csv(
         self.path,
         target="five_thirty_eight",
         converters={
             "ordinal_date": int,
             "gallup": float,
             "ipsos": float,
             "morning_consult": float,
             "rasmussen": float,
             "you_gov": float,
             "five_thirty_eight": float,
         },
     )
Ejemplo n.º 17
0
 def _iter(self):
     return stream.iter_csv(
         self.path,
         target='trip_duration',
         converters={
             'passenger_count': int,
             'pickup_longitude': float,
             'pickup_latitude': float,
             'dropoff_longitude': float,
             'dropoff_latitude': float,
             'trip_duration': int
         },
         parse_dates={'pickup_datetime': '%Y-%m-%d %H:%M:%S'},
         drop=['dropoff_datetime', 'id'])
Ejemplo n.º 18
0
 def _iter(self):
     return stream.iter_csv(
         self.path,
         target="bikes",
         converters={
             "clouds": int,
             "humidity": int,
             "pressure": float,
             "temperature": float,
             "wind": float,
             "bikes": int,
         },
         parse_dates={"moment": "%Y-%m-%d %H:%M:%S"},
     )
Ejemplo n.º 19
0
 def _iter(self):
     return stream.iter_csv(
         self.path,
         target="trip_duration",
         converters={
             "passenger_count": int,
             "pickup_longitude": float,
             "pickup_latitude": float,
             "dropoff_longitude": float,
             "dropoff_latitude": float,
             "trip_duration": int,
         },
         parse_dates={"pickup_datetime": "%Y-%m-%d %H:%M:%S"},
         drop=["dropoff_datetime", "id"],
     )
Ejemplo n.º 20
0
 def __iter__(self):
     return stream.iter_csv(self.path,
                            target='is_phishing',
                            converters={
                                'empty_server_form_handler': float,
                                'popup_window': float,
                                'https': float,
                                'request_from_other_domain': float,
                                'anchor_from_other_domain': float,
                                'is_popular': float,
                                'long_url': float,
                                'age_of_domain': int,
                                'ip_in_url': int,
                                'is_phishing': lambda x: x == '1'
                            })
Ejemplo n.º 21
0
 def _iter(self):
     return stream.iter_csv(
         self.path,
         target="class",
         converters={
             "date": float,
             "day": int,
             "period": float,
             "nswprice": float,
             "nswdemand": float,
             "vicprice": float,
             "vicdemand": float,
             "transfer": float,
             "class": lambda x: x == "UP",
         },
     )
Ejemplo n.º 22
0
 def __iter__(self):
     return stream.iter_csv(
         self.path,
         target="is_phishing",
         converters={
             "empty_server_form_handler": float,
             "popup_window": float,
             "https": float,
             "request_from_other_domain": float,
             "anchor_from_other_domain": float,
             "is_popular": float,
             "long_url": float,
             "age_of_domain": int,
             "ip_in_url": int,
             "is_phishing": lambda x: x == "1",
         },
     )
Ejemplo n.º 23
0
    def _iter(self):

        features = [
            'lepton pT', 'lepton eta', 'lepton phi',
            'missing energy magnitude', 'missing energy phi',
            'jet 1 pt', 'jet 1 eta', 'jet 1 phi', 'jet 1 b-tag',
            'jet 2 pt', 'jet 2 eta', 'jet 2 phi', 'jet 2 b-tag',
            'jet 3 pt', 'jet 3 eta', 'jet 3 phi', 'jet 3 b-tag',
            'jet 4 pt', 'jet 4 eta', 'jet 4 phi', 'jet 4 b-tag',
            'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb'
        ]

        return stream.iter_csv(
            self.path,
            fieldnames=['is_signal', *features],
            target='is_signal',
            converters={'is_signal': lambda x: x.startswith('1'), **{f: float for f in features}}
        )
Ejemplo n.º 24
0
 def __iter__(self):
     return stream.iter_csv(
         self.path,
         target=['c-class-flares', 'm-class-flares', 'x-class-flares'],
         converters={
             'zurich-class': str,
             'largest-spot-size': str,
             'spot-distribution': str,
             'activity': int,
             'evolution': int,
             'previous-24h-flare-activity': int,
             'hist-complex': int,
             'hist-complex-this-pass': int,
             'area': int,
             'largest-spot-area': int,
             'c-class-flares': int,
             'm-class-flares': int,
             'x-class-flares': int
         })
Ejemplo n.º 25
0
    def _iter(self):

        features = [
            "lepton pT",
            "lepton eta",
            "lepton phi",
            "missing energy magnitude",
            "missing energy phi",
            "jet 1 pt",
            "jet 1 eta",
            "jet 1 phi",
            "jet 1 b-tag",
            "jet 2 pt",
            "jet 2 eta",
            "jet 2 phi",
            "jet 2 b-tag",
            "jet 3 pt",
            "jet 3 eta",
            "jet 3 phi",
            "jet 3 b-tag",
            "jet 4 pt",
            "jet 4 eta",
            "jet 4 phi",
            "jet 4 b-tag",
            "m_jj",
            "m_jjj",
            "m_lv",
            "m_jlv",
            "m_bb",
            "m_wbb",
            "m_wwbb",
        ]

        return stream.iter_csv(
            self.path,
            fieldnames=["is_signal", *features],
            target="is_signal",
            converters={
                "is_signal": lambda x: x.startswith("1"),
                **{f: float
                   for f in features},
            },
        )
Ejemplo n.º 26
0
 def _iter(self):
     X_y = stream.iter_csv(
         self.path,
         target="rating",
         converters={
             "timestamp": int,
             "release_date": int,
             "age": float,
             "rating": float,
         },
         delimiter="\t",
     )
     if self.unpack_user_and_item:
         for x, y in X_y:
             user = x.pop("user")
             item = x.pop("item")
             yield x, y, {"user": user, "item": item}
     else:
         yield from X_y
Ejemplo n.º 27
0
 def __iter__(self):
     return stream.iter_csv(
         self.path,
         target=["c-class-flares", "m-class-flares", "x-class-flares"],
         converters={
             "zurich-class": str,
             "largest-spot-size": str,
             "spot-distribution": str,
             "activity": int,
             "evolution": int,
             "previous-24h-flare-activity": int,
             "hist-complex": int,
             "hist-complex-this-pass": int,
             "area": int,
             "largest-spot-area": int,
             "c-class-flares": int,
             "m-class-flares": int,
             "x-class-flares": int,
         },
     )
Ejemplo n.º 28
0
 def _iter(self):
     converters = {
         "H.period": float,
         "DD.period.t": float,
         "UD.period.t": float,
         "H.t": float,
         "DD.t.i": float,
         "UD.t.i": float,
         "H.i": float,
         "DD.i.e": float,
         "UD.i.e": float,
         "H.e": float,
         "DD.e.five": float,
         "UD.e.five": float,
         "H.five": float,
         "DD.five.Shift.r": float,
         "UD.five.Shift.r": float,
         "H.Shift.r": float,
         "DD.Shift.r.o": float,
         "UD.Shift.r.o": float,
         "H.o": float,
         "DD.o.a": float,
         "UD.o.a": float,
         "H.a": float,
         "DD.a.n": float,
         "UD.a.n": float,
         "H.n": float,
         "DD.n.l": float,
         "UD.n.l": float,
         "H.l": float,
         "DD.l.Return": float,
         "UD.l.Return": float,
         "H.Return": float,
     }
     return stream.iter_csv(
         self.path,
         target="subject",
         converters=converters,
         drop=["sessionIndex", "rep"],
     )
Ejemplo n.º 29
0
 def __iter__(self):
     return stream.iter_csv(self.path,
                            target='category',
                            converters={
                                'region-centroid-col': int,
                                'region-centroid-row': int,
                                'short-line-density-5': float,
                                'short-line-density-2': float,
                                'vedge-mean': float,
                                'vegde-sd': float,
                                'hedge-mean': float,
                                'hedge-sd': float,
                                'intensity-mean': float,
                                'rawred-mean': float,
                                'rawblue-mean': float,
                                'rawgreen-mean': float,
                                'exred-mean': float,
                                'exblue-mean': float,
                                'exgreen-mean': float,
                                'value-mean': float,
                                'saturation-mean': float,
                                'hue-mean': float
                            })
Ejemplo n.º 30
0
 def _iter(self):
     return stream.iter_csv(self.path,
                            target=[
                                'amazed-suprised', 'happy-pleased',
                                'relaxing-clam', 'quiet-still',
                                'sad-lonely', 'angry-aggresive'
                            ],
                            converters={
                                'amazed-suprised': lambda x: x == '1',
                                'happy-pleased': lambda x: x == '1',
                                'relaxing-clam': lambda x: x == '1',
                                'quiet-still': lambda x: x == '1',
                                'sad-lonely': lambda x: x == '1',
                                'angry-aggresive': lambda x: x == '1',
                                'Mean_Acc1298_Mean_Mem40_Centroid': float,
                                'Mean_Acc1298_Mean_Mem40_Rolloff': float,
                                'Mean_Acc1298_Mean_Mem40_Flux': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_0': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_1': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_2': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_3': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_4': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_5': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_6': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_7': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_8': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_9': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_10': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_11': float,
                                'Mean_Acc1298_Mean_Mem40_MFCC_12': float,
                                'Mean_Acc1298_Std_Mem40_Centroid': float,
                                'Mean_Acc1298_Std_Mem40_Rolloff': float,
                                'Mean_Acc1298_Std_Mem40_Flux': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_0': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_1': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_2': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_3': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_4': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_5': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_6': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_7': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_8': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_9': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_10': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_11': float,
                                'Mean_Acc1298_Std_Mem40_MFCC_12': float,
                                'Std_Acc1298_Mean_Mem40_Centroid': float,
                                'Std_Acc1298_Mean_Mem40_Rolloff': float,
                                'Std_Acc1298_Mean_Mem40_Flux': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_0': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_1': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_2': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_3': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_4': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_5': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_6': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_7': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_8': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_9': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_10': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_11': float,
                                'Std_Acc1298_Mean_Mem40_MFCC_12': float,
                                'Std_Acc1298_Std_Mem40_Centroid': float,
                                'Std_Acc1298_Std_Mem40_Rolloff': float,
                                'Std_Acc1298_Std_Mem40_Flux': float,
                                'Std_Acc1298_Std_Mem40_MFCC_0': float,
                                'Std_Acc1298_Std_Mem40_MFCC_1': float,
                                'Std_Acc1298_Std_Mem40_MFCC_2': float,
                                'Std_Acc1298_Std_Mem40_MFCC_3': float,
                                'Std_Acc1298_Std_Mem40_MFCC_4': float,
                                'Std_Acc1298_Std_Mem40_MFCC_5': float,
                                'Std_Acc1298_Std_Mem40_MFCC_6': float,
                                'Std_Acc1298_Std_Mem40_MFCC_7': float,
                                'Std_Acc1298_Std_Mem40_MFCC_8': float,
                                'Std_Acc1298_Std_Mem40_MFCC_9': float,
                                'Std_Acc1298_Std_Mem40_MFCC_10': float,
                                'Std_Acc1298_Std_Mem40_MFCC_11': float,
                                'Std_Acc1298_Std_Mem40_MFCC_12': float,
                                'BH_LowPeakAmp': float,
                                'BH_LowPeakBPM': int,
                                'BH_HighPeakAmp': float,
                                'BH_HighPeakBPM': int,
                                'BH_HighLowRatio': int,
                                'BHSUM1': float,
                                'BHSUM2': float,
                                'BHSUM3': float
                            })