Python check_col Examples, diogenes.utils.check_col Python Examples

Example #1

0

Show file

def get_roc_auc(labels, score, verbose=True):
    """return area under ROC curve

    Parameters
    ----------
    labels : np.ndarray
        vector of ground truth
    score : np.ndarray
        vector of scores assigned by classifier (i.e. 
        clf.pred_proba(...)[-1] in sklearn)
    verbose : boolean
        iff True, prints area under the curve
        
    Returns
    -------
    float
        area under the curve

    """
    labels = utils.check_col(labels, argument_name='labels')
    score = utils.check_col(score, argument_name='score')
    auc_score = roc_auc_score(labels, score)
    if verbose:
        print 'ROC AUC: {}'.format(auc_score)
    return auc_score

Example #2

0

Show file

File: display.py Project: digideskio/diogenes

def get_roc_auc(labels, score, verbose=True):
    """return area under ROC curve

    Parameters
    ----------
    labels : np.ndarray
        vector of ground truth
    score : np.ndarray
        vector of scores assigned by classifier (i.e. 
        clf.pred_proba(...)[-1] in sklearn)
    verbose : boolean
        iff True, prints area under the curve
        
    Returns
    -------
    float
        area under the curve

    """
    labels = utils.check_col(labels, argument_name='labels')
    score = utils.check_col(score, argument_name='score')
    auc_score = roc_auc_score(labels, score)
    if verbose:
        print 'ROC AUC: {}'.format(auc_score)
    return auc_score

Example #3

0

Show file

File: display.py Project: digideskio/diogenes

def plot_prec_recall(labels, score, title='Prec/Recall', verbose=True):
    """Plot precision/recall curve

    Parameters
    ----------
    labels : np.ndarray
        vector of ground truth
    score : np.ndarray
        vector of scores assigned by classifier (i.e. 
        clf.pred_proba(...)[-1] in sklearn)
    title : str
        title of plot
    verbose : boolean
        iff True, display the graph
        
    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    """
    labels = utils.check_col(labels, argument_name='labels')
    score = utils.check_col(score, argument_name='score')
    # adapted from Rayid's prec/recall code
    y_true = labels
    y_score = score
    precision_curve, recall_curve, pr_thresholds = precision_recall_curve(
        y_true, 
        y_score)
    precision_curve = precision_curve[:-1]
    recall_curve = recall_curve[:-1]
    pct_above_per_thresh = []
    number_scored = len(y_score)
    for value in pr_thresholds:
        num_above_thresh = len(y_score[y_score>=value])
        pct_above_thresh = num_above_thresh / float(number_scored)
        pct_above_per_thresh.append(pct_above_thresh)
    pct_above_per_thresh = np.array(pct_above_per_thresh)
    fig = plt.figure()
    ax1 = plt.gca()
    ax1.plot(pct_above_per_thresh, precision_curve, 'b')
    ax1.set_xlabel('percent of population')
    ax1.set_ylabel('precision', color='b')
    ax2 = ax1.twinx()
    ax2.plot(pct_above_per_thresh, recall_curve, 'r')
    ax2.set_ylabel('recall', color='r')
    plt.title(title)
    if verbose:
        fig.show()
    return fig

Example #4

0

Show file

def plot_prec_recall(labels, score, title='Prec/Recall', verbose=True):
    """Plot precision/recall curve

    Parameters
    ----------
    labels : np.ndarray
        vector of ground truth
    score : np.ndarray
        vector of scores assigned by classifier (i.e. 
        clf.pred_proba(...)[-1] in sklearn)
    title : str
        title of plot
    verbose : boolean
        iff True, display the graph
        
    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    """
    labels = utils.check_col(labels, argument_name='labels')
    score = utils.check_col(score, argument_name='score')
    # adapted from Rayid's prec/recall code
    y_true = labels
    y_score = score
    precision_curve, recall_curve, pr_thresholds = precision_recall_curve(
        y_true, y_score)
    precision_curve = precision_curve[:-1]
    recall_curve = recall_curve[:-1]
    pct_above_per_thresh = []
    number_scored = len(y_score)
    for value in pr_thresholds:
        num_above_thresh = len(y_score[y_score >= value])
        pct_above_thresh = num_above_thresh / float(number_scored)
        pct_above_per_thresh.append(pct_above_thresh)
    pct_above_per_thresh = np.array(pct_above_per_thresh)
    fig = plt.figure()
    ax1 = plt.gca()
    ax1.plot(pct_above_per_thresh, precision_curve, 'b')
    ax1.set_xlabel('percent of population')
    ax1.set_ylabel('precision', color='b')
    ax2 = ax1.twinx()
    ax2.plot(pct_above_per_thresh, recall_curve, 'r')
    ax2.set_ylabel('recall', color='r')
    plt.title(title)
    if verbose:
        fig.show()
    return fig

Example #5

0

Show file

    def test_check_col(self):
        valid1 = np.array([1, 2, 3, 4])
        valid2 = np.array([[1.0], [2], [3], [4]])
        valid3 = [3.0, 2.0, 1.8]
        valid4 = pd.Series(valid1)
        for valid in (valid1, valid2, valid3, valid4):
            self.assertTrue(utils.is_nd(utils.check_col(valid)))

        self.assertRaises(ValueError, utils.check_col, None)
        self.assertRaises(ValueError, utils.check_col, "lalala")
        self.assertRaises(ValueError, utils.check_col, np.array(
            [[1, 2], [3, 4]]))

        utils.check_col(valid1, n_rows=4)
        self.assertRaises(ValueError, utils.check_col, valid1, n_rows=5)

Example #6

0

Show file

def normalize(col, mean=None, stddev=None, return_fit=False):
    """Generate a normalized column.
    
    Normalize both mean and std dev.
    
    Parameters
    ----------
    col : np.ndarray
    mean : float or None
        Mean to use for fit. If none, will use 0
    stddev : float or None
    return_fit : boolean
        If True, returns tuple of fitted col, mean, and standard dev of fit.
        If False, only returns fitted col
    Returns
    -------
    np.ndarray or (np.array, float, float)
    
    """
    # see infonavit for applying to different set than we fit on
    # https://github.com/dssg/infonavit-public/blob/master/pipeline_src/preprocessing.py#L99
    # Logic is from sklearn StandardScaler, but I didn't use sklearn because
    # I want to pass in mean and stddev rather than a fitted StandardScaler
    # https://github.com/scikit-learn/scikit-learn/blob/a95203b/sklearn/preprocessing/data.py#L276
    col = utils.check_col(col)

    if mean is None:
        mean = np.mean(col)
    if stddev is None:
        stddev = np.std(col)
    res = (col - mean) / stddev
    if return_fit:
        return (res, mean, stddev)
    else:
        return res

Example #7

0

Show file

File: modify.py Project: pombredanne/diogenes

def normalize(col, mean=None, stddev=None, return_fit=False):
    """Generate a normalized column.
    
    Normalize both mean and std dev.
    
    Parameters
    ----------
    col : np.ndarray
    mean : float or None
        Mean to use for fit. If none, will use 0
    stddev : float or None
    return_fit : boolean
        If True, returns tuple of fitted col, mean, and standard dev of fit.
        If False, only returns fitted col
    Returns
    -------
    np.ndarray or (np.array, float, float)
    
    """
    # see infonavit for applying to different set than we fit on
    # https://github.com/dssg/infonavit-public/blob/master/pipeline_src/preprocessing.py#L99
    # Logic is from sklearn StandardScaler, but I didn't use sklearn because
    # I want to pass in mean and stddev rather than a fitted StandardScaler
    # https://github.com/scikit-learn/scikit-learn/blob/a95203b/sklearn/preprocessing/data.py#L276
    col = utils.check_col(col)

    if mean is None:
        mean = np.mean(col)
    if stddev is None:
        stddev = np.std(col)
    res = (col - mean) / stddev
    if return_fit:
        return (res, mean, stddev)
    else:
        return res

Example #8

0

Show file

def plot_on_timeline(col, verbose=True):
    """Plots points on a timeline
    
    Parameters
    ----------
    col : np.array
    verbose : boolean
        iff True, display the graph

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    
    Returns
    -------
    matplotlib.figure.Figure
    """
    col = utils.check_col(col)
    # http://stackoverflow.com/questions/1574088/plotting-time-in-python-with-matplotlib
    if is_nd(col):
        col = col.astype(datetime)
    dates = matplotlib.dates.date2num(col)
    fig = plt.figure()
    plt.plot_date(dates, [0] * len(dates))
    if verbose:
        plt.show()
    return fig

Example #9

0

Show file

File: display.py Project: digideskio/diogenes

def plot_box_plot(col, title=None, verbose=True):
    """Makes a box plot for a feature
    
    Parameters
    ----------
    col : np.array
    title : str or None
        title of a plot
    verbose : boolean
        iff True, display the graph
        
    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot
    
    """
    col = utils.check_col(col)

    fig = plt.figure()
    boxplot(col)
    if title:
        plt.title(title)
    #add col_name to graphn
    if verbose:
        plt.show()
    return fig

Example #10

0

Show file

def plot_box_plot(col, title=None, verbose=True):
    """Makes a box plot for a feature
    
    Parameters
    ----------
    col : np.array
    title : str or None
        title of a plot
    verbose : boolean
        iff True, display the graph
        
    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot
    
    """
    col = utils.check_col(col)

    fig = plt.figure()
    boxplot(col)
    if title:
        plt.title(title)
    #add col_name to graphn
    if verbose:
        plt.show()
    return fig

Example #11

0

Show file

def plot_simple_histogram(col, verbose=True):
    """Makes a histogram of values in a column

    Parameters
    ----------
    col : np.ndarray
    verbose : boolean
        iff True, display the graph

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    """
    col = utils.check_col(col)
    override_xticks = False
    if col.dtype.char in ('O', 'S'):  # If col is strings, handle differently
        counts = Counter(col)
        categories = sorted(counts.keys())
        hist = [counts[cat] for cat in categories]
        bins = np.arange(len(categories) + 1)
        override_xticks = True
    else:
        hist, bins = np.histogram(col, bins=50)
    width = 0.7 * (bins[1] - bins[0])
    center = (bins[:-1] + bins[1:]) / 2
    f = plt.figure()
    plt.bar(center, hist, align='center', width=width)
    if override_xticks:
        plt.xticks(center, categories)
    if verbose:
        plt.show()
    return f

Example #12

0

Show file

File: display.py Project: digideskio/diogenes

def plot_on_timeline(col, verbose=True):
    """Plots points on a timeline
    
    Parameters
    ----------
    col : np.array
    verbose : boolean
        iff True, display the graph

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    
    Returns
    -------
    matplotlib.figure.Figure
    """
    col = utils.check_col(col)
    # http://stackoverflow.com/questions/1574088/plotting-time-in-python-with-matplotlib
    if is_nd(col):
        col = col.astype(datetime)
    dates = matplotlib.dates.date2num(col)
    fig = plt.figure()
    plt.plot_date(dates, [0] * len(dates))
    if verbose:
        plt.show()
    return fig

Example #13

0

Show file

File: display.py Project: digideskio/diogenes

def plot_simple_histogram(col, verbose=True):
    """Makes a histogram of values in a column

    Parameters
    ----------
    col : np.ndarray
    verbose : boolean
        iff True, display the graph

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    """
    col = utils.check_col(col)
    override_xticks = False
    if col.dtype.char in ('O', 'S'): # If col is strings, handle differently
        counts = Counter(col)
        categories = sorted(counts.keys())
        hist = [counts[cat] for cat in categories]
        bins = np.arange(len(categories) + 1)
        override_xticks = True
    else:
        hist, bins = np.histogram(col, bins=50)
    width = 0.7 * (bins[1] - bins[0])
    center = (bins[:-1] + bins[1:]) / 2
    f = plt.figure()
    plt.bar(center, hist, align='center', width=width)
    if override_xticks:
        plt.xticks(center, categories)
    if verbose:
        plt.show()
    return f

Example #14

0

Show file

def crosstab(col1, col2, verbose=True):
    """
    Makes a crosstab of col1 and col2. This is represented as a
    structured array with the following properties:

    1. The first column is the value of col1 being crossed
    2. The name of every column except the first is the value of col2 being
       crossed
    3. To find the number of cooccurences of x from col1 and y in col2,
       find the row that has 'x' in col1 and the column named 'y'. The 
       corresponding cell is the number of cooccurrences of x and y

    Parameters
    ----------
    col1 : np.ndarray
    col2 : np.ndarray

    Returns
    -------
    np.ndarray
        structured array

    """
    col1 = utils.check_col(col1, argument_name='col1')
    col2 = utils.check_col(col2, argument_name='col2')
    col1_unique = np.unique(col1)
    col2_unique = np.unique(col2)
    crosstab_rows = []
    for col1_val in col1_unique:
        loc_col1_val = np.where(col1 == col1_val)[0]
        col2_vals = col2[loc_col1_val]
        cnt = Counter(col2_vals)
        counts = [
            cnt[col2_val] if cnt.has_key(col2_val) else 0
            for col2_val in col2_unique
        ]
        crosstab_rows.append(['{}'.format(col1_val)] + counts)
    col_names = ['col1_value'
                 ] + ['{}'.format(col2_val) for col2_val in col2_unique]
    ret = convert_to_sa(crosstab_rows, col_names=col_names)
    if verbose:
        pprint_sa(ret)
    return ret

Example #15

0

Show file

def distance_from_point(lat_origin, lng_origin, lat_col, lng_col):
    """Generates a column of how far each record is from the origin
    
    Parameters
    ----------
    lat_origin : number
    lng_origin : number
    lat_col : np.ndarray
    lng_col : np.ndarray

    Returns
    -------
    np.ndarray

    """
    lat_col = utils.check_col(lat_col, argument_name='lat_col')
    lng_col = utils.check_col(lng_col, argument_name='lng_col')

    return distance(lat_origin, lng_origin, lat_col, lng_col)

Example #16

0

Show file

File: modify.py Project: pombredanne/diogenes

def distance_from_point(lat_origin, lng_origin, lat_col, lng_col):
    """Generates a column of how far each record is from the origin
    
    Parameters
    ----------
    lat_origin : number
    lng_origin : number
    lat_col : np.ndarray
    lng_col : np.ndarray

    Returns
    -------
    np.ndarray

    """
    lat_col = utils.check_col(lat_col, argument_name='lat_col')
    lng_col = utils.check_col(lng_col, argument_name='lng_col')

    return distance(lat_origin, lng_origin, lat_col, lng_col)

Example #17

0

Show file

File: display.py Project: digideskio/diogenes

def crosstab(col1, col2, verbose=True):
    """
    Makes a crosstab of col1 and col2. This is represented as a
    structured array with the following properties:

    1. The first column is the value of col1 being crossed
    2. The name of every column except the first is the value of col2 being
       crossed
    3. To find the number of cooccurences of x from col1 and y in col2,
       find the row that has 'x' in col1 and the column named 'y'. The 
       corresponding cell is the number of cooccurrences of x and y

    Parameters
    ----------
    col1 : np.ndarray
    col2 : np.ndarray

    Returns
    -------
    np.ndarray
        structured array

    """
    col1 = utils.check_col(col1, argument_name='col1')
    col2 = utils.check_col(col2, argument_name='col2')
    col1_unique = np.unique(col1)
    col2_unique = np.unique(col2)
    crosstab_rows = []
    for col1_val in col1_unique:
        loc_col1_val = np.where(col1==col1_val)[0]
        col2_vals = col2[loc_col1_val]
        cnt = Counter(col2_vals)
        counts = [cnt[col2_val] if cnt.has_key(col2_val) else 0 for col2_val 
                  in col2_unique]
        crosstab_rows.append(['{}'.format(col1_val)] + counts)
    col_names = ['col1_value'] + ['{}'.format(col2_val) for col2_val in 
                                  col2_unique]
    ret = convert_to_sa(crosstab_rows, col_names=col_names)
    if verbose:
        pprint_sa(ret)
    return ret

Example #18

0

Show file

 def __init__(self,
              M,
              labels,
              clfs=[{
                  'clf': RandomForestClassifier
              }],
              subsets=[{
                  'subset': s_i.SubsetNoSubset
              }],
              cvs=[{
                  'cv': KFold
              }],
              trials=None):
     if M is not None:
         if utils.is_nd(M) and not utils.is_sa(M):
             # nd_array, short circuit the usual type checking and coersion
             if M.ndim != 2:
                 raise ValueError('Expected 2-dimensional array for M')
             self.M = M
             self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])]
             self.labels = utils.check_col(labels,
                                           n_rows=M.shape[0],
                                           argument_name='labels')
         else:
             # M is either a structured array or something that should
             # be converted
             (M, self.labels) = utils.check_consistent(
                 M, labels, col_argument_name='labels')
             self.col_names = M.dtype.names
             self.M = utils.cast_np_sa_to_nd(M)
     else:
         self.col_names = None
     if trials is None:
         clfs = utils.check_arguments(
             clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)},
             optional_keys_take_lists=True,
             argument_name='clfs')
         subsets = utils.check_arguments(subsets, {
             'subset':
             lambda subset: issubclass(subset, s_i.BaseSubsetIter)
         },
                                         optional_keys_take_lists=True,
                                         argument_name='subsets')
         cvs = utils.check_arguments(
             cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)},
             optional_keys_take_lists=True,
             argument_name='cvs')
     self.clfs = clfs
     self.subsets = subsets
     self.cvs = cvs
     self.trials = trials

Example #19

0

Show file

File: experiment.py Project: dssg-diogenes/diogenes

 def __init__(
         self, 
         M, 
         labels, 
         clfs=[{'clf': RandomForestClassifier}], 
         subsets=[{'subset': s_i.SubsetNoSubset}], 
         cvs=[{'cv': KFold}],
         trials=None):
     if M is not None:
         if utils.is_nd(M) and not utils.is_sa(M):
             # nd_array, short circuit the usual type checking and coersion
             if M.ndim != 2:
                 raise ValueError('Expected 2-dimensional array for M')
             self.M = M
             self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])]
             self.labels = utils.check_col(
                     labels, 
                     n_rows=M.shape[0], 
                     argument_name='labels')
         else:    
             # M is either a structured array or something that should
             # be converted
             (M, self.labels) = utils.check_consistent(
                     M, 
                     labels, 
                     col_argument_name='labels')
             self.col_names = M.dtype.names
             self.M = utils.cast_np_sa_to_nd(M)
     else:
         self.col_names = None
     if trials is None:
         clfs = utils.check_arguments(
                 clfs, 
                 {'clf': lambda clf: issubclass(clf, BaseEstimator)},
                 optional_keys_take_lists=True,
                 argument_name='clfs')
         subsets = utils.check_arguments(
                 subsets,
                 {'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter)},
                 optional_keys_take_lists=True,
                 argument_name='subsets')
         cvs = utils.check_arguments(
                 cvs,
                 {'cv': lambda cv: issubclass(cv, _PartitionIterator)},
                 optional_keys_take_lists=True,
                 argument_name='cvs')
     self.clfs = clfs
     self.subsets = subsets
     self.cvs = cvs
     self.trials = trials

Example #20

0

Show file

def plot_kernel_density(col, verbose=True):
    """Plots kernel density function of column

    From: 
    https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/

    Parameters
    ----------
    col : np.ndarray
    verbose : boolean
        iff True, display the graph

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    """
    #address pass entire matrix
    # TODO respect missing_val
    # TODO what does n do?
    col = utils.check_col(col)
    x_grid = np.linspace(min(col), max(col), 1000)

    grid = GridSearchCV(KernelDensity(),
                        {'bandwidth': np.linspace(0.1, 1.0, 30)},
                        cv=20)  # 20-fold cross-validation
    grid.fit(col[:, None])

    kde = grid.best_estimator_
    pdf = np.exp(kde.score_samples(x_grid[:, None]))

    fig, ax = plt.subplots()
    #fig = plt.figure()
    ax.plot(x_grid,
            pdf,
            linewidth=3,
            alpha=0.5,
            label='bw=%.2f' % kde.bandwidth)
    ax.hist(col, 30, fc='gray', histtype='stepfilled', alpha=0.3, normed=True)
    ax.legend(loc='upper left')
    ax.set_xlim(min(col), max(col))
    if verbose:
        plt.show()
    return fig

Example #21

0

Show file

File: display.py Project: digideskio/diogenes

def plot_kernel_density(col, verbose=True): 
    """Plots kernel density function of column

    From: 
    https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/

    Parameters
    ----------
    col : np.ndarray
    verbose : boolean
        iff True, display the graph

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    """
    #address pass entire matrix
    # TODO respect missing_val
    # TODO what does n do?
    col = utils.check_col(col)
    x_grid = np.linspace(min(col), max(col), 1000)

    grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.1,1.0,30)}, cv=20) # 20-fold cross-validation
    grid.fit(col[:, None])

    kde = grid.best_estimator_
    pdf = np.exp(kde.score_samples(x_grid[:, None]))

    fig, ax = plt.subplots()
    #fig = plt.figure()
    ax.plot(x_grid, pdf, linewidth=3, alpha=0.5, label='bw=%.2f' % kde.bandwidth)
    ax.hist(col, 30, fc='gray', histtype='stepfilled', alpha=0.3, normed=True)
    ax.legend(loc='upper left')
    ax.set_xlim(min(col), max(col))
    if verbose:
        plt.show()
    return fig

Example #22

0

Show file

def table(col, verbose=True):
    """
    Creates a summary or the number of occurrences of each value in the column

    Similar to R's table

    Parameters
    ----------
    col :np.ndarray

    Returns
    -------
    np.ndarray
        structured array
    """
    col = utils.check_col(col)
    cnt = Counter(col)
    cat_and_cnt = sorted(cnt.iteritems(), key=lambda item: item[0])
    ret = convert_to_sa(cat_and_cnt, col_names=('col_name', 'count'))
    if verbose:
        pprint_sa(ret)
    return ret

Example #23

0

Show file

File: display.py Project: digideskio/diogenes

def table(col, verbose=True):
    """
    Creates a summary or the number of occurrences of each value in the column

    Similar to R's table

    Parameters
    ----------
    col :np.ndarray

    Returns
    -------
    np.ndarray
        structured array
    """
    col = utils.check_col(col)
    cnt = Counter(col)
    cat_and_cnt = sorted(cnt.iteritems(), key=lambda item: item[0])
    ret = convert_to_sa(cat_and_cnt, col_names=('col_name', 'count'))
    if verbose:
        pprint_sa(ret)
    return ret

Example #24

0

Show file

File: modify.py Project: pombredanne/diogenes

def generate_bin(col, num_bins):
    """Generates a column of categories, where each category is a bin.

    Parameters
    ----------
    col : np.ndarray
    
    Returns
    -------
    np.ndarray
    
    Examples
    --------
    >>> M = np.array([0.1, 3.0, 0.0, 1.2, 2.5, 1.7, 2])
    >>> generate_bin(M, 3)
    [0 3 0 1 2 1 2]

    """
    col = utils.check_col(col)

    minimum = float(min(col))
    maximum = float(max(col))
    distance = float(maximum - minimum)
    return [int((x - minimum) / distance * num_bins) for x in col]

Example #25

0

Show file

File: display.py Project: jamestwhedbee/diogenes

def plot_simple_histogram(col, verbose=True):
    """Makes a histogram of values in a column

    Parameters
    ----------
    col : np.ndarray
    verbose : boolean
        iff True, display the graph

    Returns
    -------
    matplotlib.figure.Figure
        Figure containing plot

    """
    col = utils.check_col(col)
    hist, bins = np.histogram(col, bins=50)
    width = 0.7 * (bins[1] - bins[0])
    center = (bins[:-1] + bins[1:]) / 2
    f = plt.figure()
    plt.bar(center, hist, align='center', width=width)
    if verbose:
        plt.show()
    return f

Example #26

0

Show file

def generate_bin(col, num_bins):
    """Generates a column of categories, where each category is a bin.

    Parameters
    ----------
    col : np.ndarray
    
    Returns
    -------
    np.ndarray
    
    Examples
    --------
    >>> M = np.array([0.1, 3.0, 0.0, 1.2, 2.5, 1.7, 2])
    >>> generate_bin(M, 3)
    [0 3 0 1 2 1 2]

    """
    col = utils.check_col(col)

    minimum = float(min(col))
    maximum = float(max(col))
    distance = float(maximum - minimum)
    return [int((x - minimum) / distance * num_bins) for x in col]