예제 #1
0
    def __init__(self, context):
        super().__init__(context)
        # Numpy has no concept of "void" column (at least, not similar to ours),
        # so avoid that random type:
        coltype = random_type(allow_void=False)
        mfraction = random.random()
        data, mmask = random_column(self.frame.nrows,
                                    coltype,
                                    missing_fraction=mfraction,
                                    missing_nones=False)

        # On Linux/Mac numpy's default int type is np.int64,
        # while on Windows it is np.int32. Here we force it to be
        # np.int64 for consistency.
        np_dtype = np.int64 if coltype == int else np.dtype(coltype)
        np_data = np.ma.array(data, mask=mmask, dtype=np_dtype)

        # Save random numpy arrays to make sure they don't change with
        # munging. Arrays that are not saved here will be eventually deleted
        # by Python, in such a case we also test datatable behaviour.
        if random.random() > 0.5:
            context.add_deferred_check(deferred_nparray_check(np_data))

        for i in range(len(data)):
            if mmask[i]:
                data[i] = None

        self.column_name = random_names(1)[0]
        self.column_type = coltype
        self.np_dtype = np_dtype
        self.np_data = np_data
        self.np_mask = mmask
        self.py_data = data
예제 #2
0
    def cbind_numpy_column(self):
        import numpy as np
        coltype = random_type()
        mfraction = random.random()
        data, mmask = random_column(self.nrows, coltype, mfraction, False)

        # On Linux/Mac numpy's default int type is np.int64,
        # while on Windows it is np.int32. Here we force it to be
        # np.int64 for consistency.
        np_dtype = np.int64 if coltype == int else np.dtype(coltype)
        np_data = np.ma.array(data, mask=mmask, dtype=np_dtype)

        # Save random numpy arrays to make sure they don't change with
        # munging. Arrays that are not saved here will be eventually deleted
        # by Python, in such a case we also test datatable behaviour.
        if random.random() > 0.5:
            self.np_data += [np_data]
            self.np_data_deepcopy += [copy.deepcopy(np_data)]

        names = random_names(1)
        df = dt.Frame(np_data.T, names=names)

        for i in range(self.nrows):
            if mmask[i]: data[i] = None

        self.df.cbind(df)
        self.data += [data]
        self.types += [coltype]
        self.names += names
        self.dedup_names()
예제 #3
0
 def random(ncols=None,
            nrows=None,
            types=None,
            names=None,
            missing_fraction=None):
     if ncols is None:
         if types:
             ncols = len(types)
         elif names:
             ncols = len(names)
         else:
             ncols = int(random.expovariate(0.2)) + 1
     if nrows is None:
         nrows = int(random.expovariate(0.01)) + 1
     if not types:
         types = [random_type() for _ in range(ncols)]
     if not names:
         names = random_names(ncols)
     if missing_fraction is None:
         missing_fraction = random.random()**10
         if missing_fraction < 0.05 or nrows == 1:
             missing_fraction = 0.0
     assert isinstance(ncols, int)
     assert isinstance(nrows, int)
     assert isinstance(types, list) and len(types) == ncols
     assert isinstance(names, list) and len(names) == ncols
     assert isinstance(missing_fraction, float)
     tt = {bool: 0, int: 0, float: 0, str: 0}
     for t in types:
         tt[t] += 1
     assert len(tt) == 4
     print("# Making a frame with nrows=%d, ncols=%d" % (nrows, ncols))
     print("#   types: bool=%d, int=%d, float=%d, str=%d" %
           (tt[bool], tt[int], tt[float], tt[str]))
     print("#   missing values: %.3f" % missing_fraction)
     data = [
         random_column(nrows, types[i], missing_fraction)[0]
         for i in range(ncols)
     ]
     frame = MetaFrame()
     frame.data = data
     frame.names = names
     frame.types = types
     frame.nkeys = 0
     frame.df = dt.Frame(data, names=names, stypes=types)
     print(f"{frame.name} = dt.Frame(")
     print(f"    {repr_data(data, 4)},")
     print(f"    names={names},")
     print(f"    stypes={repr_types(types)}")
     print(f")")
     return frame