def plot_histograms(args):
  classifiers = {}
  data, labels = [], []
  for _file in args.infiles:
    label, content = _format_data(_file, args)
    labels.append(label)
    X, _ = zip(*content)
    data.append(X)
  
  plt.hist(data, args.histogram_bins, histtype='bar', stacked=False, label=labels)

  plt.legend(loc=args.legend_loc)

  set_plot_limits(plt, args)
  if args.savefig is not None:
    filename = args.savefig
    modifiers = []
    if args.x_limits:
      modifiers.append('X=%d,%d' % tuple(args.x_limits))
    if args.y_limits:
      modifiers.append('Y=%d,%d' % tuple(args.y_limits))
    name, ext = filename.rsplit('.')
    new_filename = '{old_name}-{modifiers}.{ext}'.format(old_name=name, modifiers='-'.join(modifiers), ext=ext)
    print 'saving figure to - ', new_filename
    plt.savefig(new_filename, dpi=320)
    plt.clf()
  else:
    plt.show()
Example #2
0
def plot_histograms(args):
    classifiers = {}
    data, labels = [], []
    for _file in args.infiles:
        label, content = _format_data(_file, args)
        labels.append(label)
        X, _ = zip(*content)
        data.append(X)

    plt.hist(data,
             args.histogram_bins,
             histtype='bar',
             stacked=False,
             label=labels)

    plt.legend(loc=args.legend_loc)

    set_plot_limits(plt, args)
    if args.savefig is not None:
        filename = args.savefig
        modifiers = []
        if args.x_limits:
            modifiers.append('X=%d,%d' % tuple(args.x_limits))
        if args.y_limits:
            modifiers.append('Y=%d,%d' % tuple(args.y_limits))
        name, ext = filename.rsplit('.')
        new_filename = '{old_name}-{modifiers}.{ext}'.format(
            old_name=name, modifiers='-'.join(modifiers), ext=ext)
        print 'saving figure to - ', new_filename
        plt.savefig(new_filename, dpi=320)
        plt.clf()
    else:
        plt.show()
Example #3
0
def _plot_hist2d(data, args):
  data = data[data[args.hist2d[0]].notnull()][data[args.hist2d[1]].notnull()]
  if data.shape[0] < 1000:
    sys.exit(1)
  plt.hist2d(data[args.hist2d[0]],
             data[args.hist2d[1]],
             bins=args.histogram_bins,
             norm=LogNorm())
  plt.colorbar()
  set_plot_limits(plt, args)
  plt.xlabel(args.hist2d[0])
  plt.ylabel(args.hist2d[1])
  set_plot_limits(plt, args)
  plt.title("N = {}".format(data.shape[0]))
def _plot_hist2d(data, args):
    data = data[data[args.hist2d[0]].notnull()][data[args.hist2d[1]].notnull()]
    if data.shape[0] < 1000:
        sys.exit(1)
    plt.hist2d(data[args.hist2d[0]],
               data[args.hist2d[1]],
               bins=args.histogram_bins,
               norm=LogNorm())
    plt.colorbar()
    set_plot_limits(plt, args)
    plt.xlabel(args.hist2d[0])
    plt.ylabel(args.hist2d[1])
    set_plot_limits(plt, args)
    plt.title("N = {}".format(data.shape[0]))
Example #5
0
def _plot_hist2d(data, args):
    data = data[data[args.hist2d[0]].notnull()][data[args.hist2d[1]].notnull()]
    if data.shape[0] < 1000:
        sys.exit(1)
    df = data.replace([np.inf, -np.inf], np.nan).dropna(subset=args.hist2d)
    plt.hist2d(df[args.hist2d[0]].astype(float),
               df[args.hist2d[1]].astype(float),
               bins=args.histogram_bins,
               norm=LogNorm())
    plt.colorbar()
    set_plot_limits(plt, args)
    plt.xlabel(args.hist2d[0])
    plt.ylabel(args.hist2d[1])
    set_plot_limits(plt, args)
    plt.title("N = {}".format(data.shape[0]))
def _plot_hist2d(data, args):
  data = data[data[args.hist2d[0]].notnull()][data[args.hist2d[1]].notnull()]
  if data.shape[0] < 1000:
    sys.exit(1)
  df = data.replace([np.inf, -np.inf], np.nan).dropna(subset=args.hist2d)
  plt.hist2d(df[args.hist2d[0]].astype(float),
             df[args.hist2d[1]].astype(float),
             bins=args.histogram_bins,
             norm=LogNorm())
  plt.colorbar()
  set_plot_limits(plt, args)
  plt.xlabel(args.hist2d[0])
  plt.ylabel(args.hist2d[1])
  set_plot_limits(plt, args)
  plt.title("N = {}".format(data.shape[0]))
Example #7
0
def plot_distribution(args):
    if args.csv is not None:
        data = pd.read_csv(args.csv)
        print ' '.join(list(data.columns.values))
        if args.filter_num_rtus:
            print 'before filtering size =', data.shape[0]
            data = data[data['num_rtus'] == args.filter_num_rtus]
            print 'after filtering size =', data.shape[0]
        if args.filter_controller:
            print 'before filtering size =', data.shape[0]
            data = data[data['controller_id'] == args.filter_controller]
            print 'after filtering size =', data.shape[0]
        if 'controller_id' in data:
            print 'total controller_ids included =', len(
                set(data['controller_id']))
        if 'num_rtus' in data:
            print 'distinct num_rtus =', len(set(data['num_rtus'])), set(
                data['num_rtus'])
    else:
        cursor = args.db_connection.cursor()
        cursor.execute(
            "select relname from pg_class where relkind='r' and relname !~ '^(pg_|sql_)';"
        )  # noqa
        if args.query:
            with open(args.query, 'r') as infile:
                sql = ''.join(list(infile))
        else:
            sql = """
        SELECT {select} FROM {table};
      """.format(select='*', table=args.table)
        print sql
        cursor.execute(sql)
        colnames = [desc[0] for desc in cursor.description]
        data = pd.DataFrame(cursor.fetchall(), columns=colnames)

    # Set args.data, so we can pass only args to functions
    args.data = data

    data_size = data.shape[0]

    if args.scatter is not None:
        if args.labels:
            interesting_data = data[[
                args.scatter[0], args.scatter[1], args.labels
            ]]
            different_labels = set(data[args.labels])
            for label, color in zip(different_labels,
                                    matplotlib.colors.cnames.keys()):
                df = interesting_data.query('{column} == "{label}"'.format(
                    column=args.labels, label=label))
                plt.scatter(df[args.scatter[0]],
                            df[args.scatter[1]],
                            c=color,
                            label=label)
        else:
            plt.scatter(data[args.scatter[0]], data[args.scatter[1]], c=color)
        plt.xlabel(args.scatter[0])
        plt.ylabel(args.scatter[1])
    elif args.histogram is not None:
        if args.labels:
            interesting_data = data[[args.histogram, args.labels]]
            different_labels = set(data[args.labels])
            data_to_plot, colors_to_use, labels_to_show = [], [], []
            miscellaneous_labels = set()
            misc_frame, misc_color = pd.DataFrame(), None
            for label, color in zip(different_labels,
                                    matplotlib.colors.cnames.keys()):
                df = interesting_data.query('{column} == "{label}"'.format(
                    column=args.labels, label=label))
                if df.shape[0] < args.miscellaneous_cutoff * data_size:
                    miscellaneous_labels.add(label)
                    misc_frame = pd.concat([misc_frame, df[args.histogram]])
                    misc_color = color
                    continue
                labels_to_show.append('{label} ({count})'.format(
                    label=label, count=df.shape[0]))
                data_to_plot.append(df[args.histogram])
                colors_to_use.append(color)
            if misc_color is not None:
                labels_to_show.append('miscellaneous ({count})'.format(
                    count=misc_frame.shape[0]))
                data_to_plot.append(misc_frame)
                # colors_to_use.append(misc_color)
                colors_to_use.append('cyan')
            plt.hist(data_to_plot,
                     args.histogram_bins,
                     histtype='bar',
                     color=colors_to_use,
                     label=labels_to_show)
        else:
            df = data.replace([np.inf, -np.inf],
                              np.nan).dropna(subset=[args.histogram])
            plt.hist(df[args.histogram].astype(float),
                     bins=args.histogram_bins,
                     label=args.histogram)
            plt.yscale('log')

        plt.xlabel(args.histogram)
        if args.scale_down:
            plt.ylim(ymax=int(data_size * args.miscellaneous_cutoff))
    elif args.hist2d is not None:
        _plot_hist2d(data, args)
    elif args.scatter3d is not None:
        plot_scatter3d(data, args)

    plt.legend()
    if not args.scatter3d and not args.histogram:
        set_plot_limits(plt, args)
    if args.savefig is not None:
        plt.savefig(args.savefig, dpi=320)
        plt.clf()
    else:
        plt.show()
Example #8
0
def plot_estimator(args):
    estimates = json.load(args.estimates)

    dimensions = len(_format_datum(estimates['inliers'][0], args.columns)) - 1

    y_limits = None if dimensions != 2 else args.y_limits

    inliers = _extract_data(estimates, 'inliers', args.columns, args.x_limits,
                            y_limits)
    outliers = _extract_data(estimates, 'outliers', args.columns,
                             args.x_limits, y_limits)
    all_data = itertools.chain(inliers, outliers)

    print 'plotting %d dimensional plot' % dimensions

    if dimensions == 1:
        if args.restrict_to:
            data = _extract_data(estimates, args.restrict_to, args.columns,
                                 args.x_limits, None)
            X, _ = zip(*data)
            plt.hist(X, args.histogram_bins, label=args.restrict_to)
        else:
            inliers = list(inliers)
            outliers = list(outliers)
            X1, _ = zip(*inliers)
            X2, _ = zip(*outliers)
            n, bins, patches = plt.hist([X1, X2],
                                        args.histogram_bins,
                                        histtype='bar',
                                        stacked=args.stacked_hist,
                                        label=['inliers', 'outliers'],
                                        color=['blue', 'red'])
            bin_width = bins[1] - bins[0]
            print 'plotted a curve with bin width =', bin_width

        if args.plot_scores:
            # Plot the estimate
            combined_X, scores = zip(
                *sorted(itertools.chain(inliers, outliers)))
            scaling_factor = bin_width * (len(estimates['inliers']) +
                                          len(estimates['outliers']))
            sign = 1. if scores[0] > 0 else -1
            scaling_factor *= sign
            scaled_scores = [scaling_factor * y for y in scores]
            plt.plot(combined_X,
                     scaled_scores,
                     color='magenta',
                     label='est distribution',
                     lw=1.1)
    elif dimensions == 2:
        if args.restrict_to:
            data = _extract_data(estimates, args.restrict_to, args.columns,
                                 args.x_limits, args.y_limits)
        else:
            data = all_data
        X, Y, _ = zip(*data)
        plt.hist2d(X, Y, bins=args.histogram_bins, norm=LogNorm())
        plt.colorbar()

    plt.legend(loc=args.legend_loc)

    set_plot_limits(plt, args)
    if args.savefig is not None:
        if args.savefig == SAVEFIG_INFER_VALUE:
            name = os.path.basename(args.estimates.name).rsplit('.')[0]
            filename = 'target/plots/{}.png'.format(name)
        else:
            filename = args.savefig
        modifiers = []
        if args.restrict_to:
            modifiers.append(args.restrict_to)
        if args.estimates:
            modifiers.append('estimates')
        if args.x_limits:
            modifiers.append('X=%d,%d' % tuple(args.x_limits))
        if args.y_limits:
            modifiers.append('Y=%d,%d' % tuple(args.y_limits))
        name, ext = filename.rsplit('.')
        new_filename = '{old_name}-{modifiers}.{ext}'.format(
            old_name=name, modifiers='-'.join(modifiers), ext=ext)
        print 'saving figure to - ', new_filename
        plt.savefig(new_filename, dpi=320)
        plt.clf()
    else:
        plt.show()
def plot_distribution(args):
  if args.csv is not None:
    data = pd.read_csv(args.csv)
    print ' '.join(list(data.columns.values))
    if args.filter_num_rtus:
      print 'before filtering size =', data.shape[0]
      data = data[data['num_rtus'] == args.filter_num_rtus]
      print 'after filtering size =', data.shape[0]
    if args.filter_controller:
      print 'before filtering size =', data.shape[0]
      data = data[data['controller_id'] == args.filter_controller]
      print 'after filtering size =', data.shape[0]
    if 'controller_id' in data:
      print 'total controller_ids included =', len(set(data['controller_id']))
    if 'num_rtus' in data:
      print 'distinct num_rtus =', len(set(data['num_rtus'])), set(data['num_rtus'])
  else:
    cursor = args.db_connection.cursor()
    cursor.execute("select relname from pg_class where relkind='r' and relname !~ '^(pg_|sql_)';")  # noqa
    if args.query:
      with open(args.query, 'r') as infile:
        sql = ''.join(list(infile))
    else:
      sql = """
        SELECT {select} FROM {table};
      """.format(select='*', table=args.table)
    print sql
    cursor.execute(sql)
    colnames = [desc[0] for desc in cursor.description]
    data = pd.DataFrame(cursor.fetchall(), columns=colnames)

  # Set args.data, so we can pass only args to functions
  args.data = data

  data_size = data.shape[0]

  if args.scatter is not None:
    if args.labels:
      interesting_data = data[[args.scatter[0], args.scatter[1], args.labels]]
      different_labels = set(data[args.labels])
      for label, color in zip(different_labels,
                              matplotlib.colors.cnames.keys()):
        df = interesting_data.query('{column} == "{label}"'.format(
                                    column=args.labels, label=label))
        plt.scatter(df[args.scatter[0]], df[args.scatter[1]],
                    c=color, label=label)
    else:
      plt.scatter(data[args.scatter[0]], data[args.scatter[1]],
                  c=color)
    plt.xlabel(args.scatter[0])
    plt.ylabel(args.scatter[1])
  elif args.histogram is not None:
    if args.labels:
      interesting_data = data[[args.histogram, args.labels]]
      different_labels = set(data[args.labels])
      data_to_plot, colors_to_use, labels_to_show = [], [], []
      miscellaneous_labels = set()
      misc_frame, misc_color = pd.DataFrame(), None
      for label, color in zip(different_labels,
                              matplotlib.colors.cnames.keys()):
        df = interesting_data.query('{column} == "{label}"'.format(
                                    column=args.labels, label=label))
        if df.shape[0] < args.miscellaneous_cutoff * data_size:
          miscellaneous_labels.add(label)
          misc_frame = pd.concat([misc_frame, df[args.histogram]])
          misc_color = color
          continue
        labels_to_show.append('{label} ({count})'.format(label=label,
                                                         count=df.shape[0]))
        data_to_plot.append(df[args.histogram])
        colors_to_use.append(color)
      if misc_color is not None:
        labels_to_show.append('miscellaneous ({count})'.format(
                              count=misc_frame.shape[0]))
        data_to_plot.append(misc_frame)
        # colors_to_use.append(misc_color)
        colors_to_use.append('cyan')
      plt.hist(data_to_plot, args.histogram_bins, histtype='bar',
               color=colors_to_use, label=labels_to_show)
    else:
      df = data.replace([np.inf, -np.inf], np.nan).dropna(subset=[args.histogram])
      plt.hist(df[args.histogram].astype(float),
               bins=args.histogram_bins,
               label=args.histogram)
      plt.yscale('log')

    plt.xlabel(args.histogram)
    if args.scale_down:
      plt.ylim(ymax=int(data_size * args.miscellaneous_cutoff))
  elif args.hist2d is not None:
    _plot_hist2d(data, args)
  elif args.scatter3d is not None:
    plot_scatter3d(data, args)

  plt.legend()
  if not args.scatter3d and not args.histogram:
    set_plot_limits(plt, args)
  if args.savefig is not None:
    plt.savefig(args.savefig, dpi=320)
    plt.clf()
  else:
    plt.show()
Example #10
0
def plot_estimator(args):
  estimates = json.load(args.estimates)

  dimensions = len(_format_datum(estimates['inliers'][0], args.columns)) - 1

  y_limits = None if dimensions != 2 else args.y_limits

  inliers = _extract_data(estimates, 'inliers', args.columns,
                          args.x_limits, y_limits)
  outliers = _extract_data(estimates, 'outliers', args.columns,
                           args.x_limits, y_limits)
  all_data = itertools.chain(inliers, outliers)

  print 'plotting %d dimensional plot' % dimensions

  if dimensions == 1:
    if args.restrict_to:
      data = _extract_data(estimates, args.restrict_to, args.columns,
                           args.x_limits, None)
      X, _ = zip(*data)
      plt.hist(X, args.histogram_bins,
               label=args.restrict_to)
    else:
      inliers = list(inliers)
      outliers = list(outliers)
      X1, _ = zip(*inliers)
      X2, _ = zip(*outliers)
      n, bins, patches = plt.hist([X1, X2], args.histogram_bins,
                                  histtype='bar',
                                  stacked=args.stacked_hist,
                                  label=['inliers', 'outliers'],
                                  color=['blue', 'red'])
      bin_width = bins[1] - bins[0]
      print 'plotted a curve with bin width =', bin_width

    if args.plot_scores:
      # Plot the estimate
      combined_X, scores = zip(*sorted(itertools.chain(inliers, outliers)))
      scaling_factor = bin_width * (len(estimates['inliers']) +
                                    len(estimates['outliers']))
      sign = 1. if scores[0] > 0 else -1
      scaling_factor *= sign
      scaled_scores = [scaling_factor * y for y in scores]
      plt.plot(combined_X, scaled_scores,
               color='magenta', label='est distribution', lw=1.1)
  elif dimensions == 2:
    if args.restrict_to:
      data = _extract_data(estimates, args.restrict_to, args.columns,
                           args.x_limits, args.y_limits)
    else:
      data = all_data
    X, Y, _ = zip(*data)
    plt.hist2d(X, Y,
               bins=args.histogram_bins,
               norm=LogNorm())
    plt.colorbar()

  plt.legend(loc=args.legend_loc)

  set_plot_limits(plt, args)
  if args.savefig is not None:
    if args.savefig == SAVEFIG_INFER_VALUE:
      name = os.path.basename(args.estimates.name).rsplit('.')[0]
      filename = 'target/plots/{}.png'.format(name)
    else:
      filename = args.savefig
    modifiers = []
    if args.restrict_to:
      modifiers.append(args.restrict_to)
    if args.estimates:
      modifiers.append('estimates')
    if args.x_limits:
      modifiers.append('X=%d,%d' % tuple(args.x_limits))
    if args.y_limits:
      modifiers.append('Y=%d,%d' % tuple(args.y_limits))
    name, ext = filename.rsplit('.')
    new_filename = '{old_name}-{modifiers}.{ext}'.format(
        old_name=name, modifiers='-'.join(modifiers), ext=ext)
    print 'saving figure to - ', new_filename
    plt.savefig(new_filename, dpi=320)
    plt.clf()
  else:
    plt.show()
Example #11
0
def plot_score_contours(args):
    weights = []
    centers = []
    sigmas = []
    if args.centers and args.covariances and args.weights:
        # normalize weights to sum to 1.
        weights = json.load(args.weights)
        weights = [w / sum(weights) for w in weights]
        weights = [0.4 * w / max(weights) for w in weights]
        centers = load_cluster_parameters(args.centers)
        sigmas = load_cluster_parameters(args.covariances)
        fig = plt.figure(0)
        ax = fig.add_subplot(111)
        for i in range(len(centers)):
            w, h, angle = get_ellipse_from_covariance(sigmas[i])
            e = patches.Ellipse(centers[i], w, h, angle=angle)
            e.set_alpha(weights[i])
            ax.add_artist(e)
            print i, weights[i], centers[i], sigmas[i]
        set_ax_limits(ax, args)
        x, y = zip(*centers)
        plt.scatter(x, y, s=weights)

    X, Y, Z = load_json_dump(args.scored_grid)

    if args.score_cap:
        Z = [min(z, args.score_cap) for z in Z]
    if args.score_lower_limit:
        Z = [max(z, args.score_lower_limit) for z in Z]

    size = int(math.sqrt(len(Z)))
    X = np.reshape(X, (size, size))
    Y = np.reshape(Y, (size, size))
    Z = np.reshape(Z, (size, size))

    def format_args(i):
        kwargs = {}
        kwargs['mux'] = centers[i][0]
        kwargs['muy'] = centers[i][1]
        kwargs['sigmax'] = math.sqrt(sigmas[i][0][0])
        kwargs['sigmay'] = math.sqrt(sigmas[i][1][1])
        kwargs['sigmaxy'] = sigmas[i][0][1]
        return kwargs

    if len(weights):
        Zgaussians = weights[0] * mlab.bivariate_normal(X, Y, **format_args(0))
        for i in range(1, len(centers)):
            Zgaussians += weights[i] * mlab.bivariate_normal(
                X, Y, **format_args(i))

    if args.plot == 'components':
        CS = plt.contour(X, Y, Zgaussians, linewidth=10000, inline=1)
    elif args.plot == 'density':
        CS = plt.contour(X, Y, Z, linewidth=10000, inline=1)
    elif args.plot == 'difference':
        CS = plt.contour(X, Y, Z - Zgaussians, linewidth=10000, inline=1)

    if args.plot != 'noop':
        plt.clabel(CS, inline=1)

    set_plot_limits(plt, args)

    if args.csv and args.hist2d:
        args.data = pd.read_csv(args.csv)
        _plot_hist2d(args.data, args)

    if args.savefig:
        if args.savefig == SAVEFIG_INFER_VALUE:
            name = os.path.basename(args.scored_grid).rsplit('.')[0]
            filename = 'target/plots/{}.png'.format(name)
        else:
            filename = args.savefig
        print 'saving figure to - ', filename
        plt.savefig(filename, dpi=320)
    else:
        plt.show()
def plot_score_contours(args):
  weights = []
  centers = []
  sigmas = []
  if args.centers and args.covariances and args.weights:
    # normalize weights to sum to 1.
    weights = json.load(args.weights)
    weights = [w / sum(weights) for w in weights]
    weights = [0.4 * w / max(weights) for w in weights]
    centers = load_cluster_parameters(args.centers)
    sigmas = load_cluster_parameters(args.covariances)
    fig = plt.figure(0)
    ax = fig.add_subplot(111)
    for i in range(len(centers)):
      w, h, angle = get_ellipse_from_covariance(sigmas[i])
      e = patches.Ellipse(centers[i], w, h, angle=angle)
      e.set_alpha(weights[i])
      ax.add_artist(e)
      print i, weights[i], centers[i], sigmas[i]
    set_ax_limits(ax, args)
    x, y = zip(*centers)
    plt.scatter(x, y, s=weights)

  X, Y, Z = load_json_dump(args.scored_grid)

  if args.score_cap:
    Z = [min(z, args.score_cap) for z in Z]
  if args.score_lower_limit:
    Z = [max(z, args.score_lower_limit) for z in Z]

  size = int(math.sqrt(len(Z)))
  X = np.reshape(X, (size, size))
  Y = np.reshape(Y, (size, size))
  Z = np.reshape(Z, (size, size))

  def format_args(i):
    kwargs = {}
    kwargs['mux'] = centers[i][0]
    kwargs['muy'] = centers[i][1]
    kwargs['sigmax'] = math.sqrt(sigmas[i][0][0])
    kwargs['sigmay'] = math.sqrt(sigmas[i][1][1])
    kwargs['sigmaxy'] = sigmas[i][0][1]
    return kwargs

  if len(weights):
    Zgaussians = weights[0] * mlab.bivariate_normal(X, Y, **format_args(0))
    for i in range(1, len(centers)):
      Zgaussians += weights[i] * mlab.bivariate_normal(X, Y, **format_args(i))

  if args.plot == 'components':
    CS = plt.contour(X, Y, Zgaussians, linewidth=10000, inline=1)
  elif args.plot == 'density':
    CS = plt.contour(X, Y, Z, linewidth=10000, inline=1)
  elif args.plot == 'difference':
    CS = plt.contour(X, Y, Z - Zgaussians, linewidth=10000, inline=1)

  if args.plot != 'noop':
    plt.clabel(CS, inline=1)

  set_plot_limits(plt, args)

  if args.csv and args.hist2d:
    args.data = pd.read_csv(args.csv)
    _plot_hist2d(args.data, args)

  if args.savefig:
    if args.savefig == SAVEFIG_INFER_VALUE:
      name = os.path.basename(args.scored_grid).rsplit('.')[0]
      filename = 'target/plots/{}.png'.format(name)
    else:
      filename = args.savefig
    print 'saving figure to - ', filename
    plt.savefig(filename, dpi=320)
  else:
    plt.show()
Example #13
0
def plot_distribution(args):
  if args.csv is None:
    cursor = args.db_connection.cursor()
    cursor.execute("select relname from pg_class where relkind='r' and relname !~ '^(pg_|sql_)';")  # noqa
    print cursor.fetchall()
    sql = """
      SELECT {select} FROM {table};
    """.format(select='*', table=args.table)
    print sql
    colnames = [desc[0] for desc in cursor.description]
    data = pd.DataFrame(cursor.fetchall(), columns=colnames)
  else:
    data = pd.read_csv(args.csv)

  # Set args.data, so we can pass only args to functions
  args.data = data

  data_size = data.shape[0]

  if args.scatter is not None:
    if args.labels:
      interesting_data = data[[args.scatter[0], args.scatter[1], args.labels]]
      different_labels = set(data[args.labels])
      for label, color in zip(different_labels,
                              matplotlib.colors.cnames.keys()):
        df = interesting_data.query('{column} == "{label}"'.format(
                                    column=args.labels, label=label))
        plt.scatter(df[args.scatter[0]], df[args.scatter[1]],
                    c=color, label=label)
    else:
      plt.scatter(data[args.scatter[0]], data[args.scatter[1]],
                  c=color)
    plt.xlabel(args.scatter[0])
    plt.ylabel(args.scatter[1])
  elif args.histogram is not None:
    if args.labels:
      interesting_data = data[[args.histogram, args.labels]]
      different_labels = set(data[args.labels])
      data_to_plot, colors_to_use, labels_to_show = [], [], []
      miscellaneous_labels = set()
      misc_frame, misc_color = pd.DataFrame(), None
      for label, color in zip(different_labels,
                              matplotlib.colors.cnames.keys()):
        df = interesting_data.query('{column} == "{label}"'.format(
                                    column=args.labels, label=label))
        if df.shape[0] < args.miscellaneous_cutoff * data_size:
          miscellaneous_labels.add(label)
          misc_frame = pd.concat([misc_frame, df[args.histogram]])
          misc_color = color
          continue
        labels_to_show.append('{label} ({count})'.format(label=label,
                                                         count=df.shape[0]))
        data_to_plot.append(df[args.histogram])
        colors_to_use.append(color)
      if misc_color is not None:
        labels_to_show.append('miscellaneous ({count})'.format(
                              count=misc_frame.shape[0]))
        data_to_plot.append(misc_frame)
        # colors_to_use.append(misc_color)
        colors_to_use.append('cyan')
      plt.hist(data_to_plot, args.histogram_bins, histtype='bar',
               color=colors_to_use, label=labels_to_show)
    else:
      plt.hist(data[args.histogram], args.histogram_bins,
               label=args.histogram)
    plt.xlabel(args.histogram)
    if args.scale_down:
      plt.ylim(ymax=int(data_size * args.miscellaneous_cutoff))
  elif args.hist2d is not None:
    _plot_hist2d(data, args)

  plt.legend()
  set_plot_limits(plt, args)
  if args.savefig is not None:
    plt.savefig(args.savefig, dpi=320)
    plt.clf()
  else:
    plt.show()