Exemple #1
0
 def __init__(self, addr):
     self.addr = addr
     self.conn = sql.connect(addr)
     self.crsr = self.conn.cursor()
     self.debug = Debugger()
     self.debug.prn(self, 'SQLConnection object created.')
     self.debug.prn(self, 'Connection established.')
Exemple #2
0
 def __init__(self):
   self.debug = Debugger()
   self._set_axes()
   self._set_labels()
   self.sketches = []
   self.filename = g.files['plot']
   self.debug.prn(self, 'Plotter object created.')
Exemple #3
0
 def __init__(self, f, training_x, training_y, index):
     self.f = f
     self.debug = Debugger()
     self.training_x = training_x
     self.training_y = training_y
     self.index = index
     g.debug.prn(self, 'Model created.')
Exemple #4
0
def init_globals():
    g.randomizer = Randomizer()
    g.debug = Debugger()
    g.console = Console()
    g.analyzer = Analyzer()
    g.modeller = Modeller(g.analyzer)
    g.gui = GUI(plotter, g.analyzer, g.modeller)
    g.output_file_formatter = OutputFileFormatter()
Exemple #5
0
 def __init__(self, analyzer):
     self.debug = Debugger()
     self.randomizer = Randomizer()
     self.analyzer = analyzer
     self.data = None
     self.linear_models = []
     self.logistic_models = []
     self.ridge_models = []
     g.debug.prn(self, 'Modeller object created.')
Exemple #6
0
 def __init__(self, f, training_x, training_y, index):
     self.f = f
     self.debug = Debugger()
     self.training_x = training_x
     self.training_y = training_y
     self.index = index
Exemple #7
0
 def __init__(self, analyzer):
     self.debug = Debugger()
     self.randomizer = Randomizer()
     self.analyzer = analyzer
     self.data = None
     self.linear_models = []
Exemple #8
0
 def __init__(self):
   self.debug = Debugger()
   self.x = []
   self.y = []
Exemple #9
0
class Sketch(object):
  def __init__(self):
    self.debug = Debugger()
    self.x = []
    self.y = []
  def class_name(self):
    return 'Sketch'
  def add_x(self, x):
    if type(x) == list:
      for v in x:
        self.x.append(v)
      self.debug.prn(self, 'Added x-vals.')
    elif type(x) == int or type(x) == np.float64:
      self.x.append(x)
      self.debug.prn(self, 'Added x-val.')
    else:
      self.debug.prn(self, 'Incorrect type passed to add_x()', 1)
      print(type(x))
  def get_x(self):
    return self.x
  def add_y(self, y):
    for v in y:
      self.y.append(v)
    self.debug.prn(self, 'Added y-vals.')
  def get_y(self):
    return self.y
  def add(self, c):
    for x,y in c:
      self.x.append(x)
      self.y.append(y)
    self.debug.prn(self, 'Added coords.')
  def plot(self):
    self.debug.prn(self, 'Cannot call plot() on abstract Sketch.', 1)
Exemple #10
0
class Plotter(object):
  def __init__(self):
    self.debug = Debugger()
    self._set_axes()
    self._set_labels()
    self.sketches = []
    self.filename = g.files['plot']
    self.debug.prn(self, 'Plotter object created.')
  def class_name(self):
    return 'Plotter'
  def _set_axes(self):
    plt.axhline(0, color=g.x_clr)
    plt.axvline(0, color=g.y_clr)
    self.debug.prn(self, 'Axes drawn.', 3)
  def set_x_axis_label(self, x_label):
    plt.xlabel(x_label)
  def set_y_axis_label(self, y_label):
    plt.ylabel(y_label)
  def set_axis_labels(self, x_label, y_label):
    plt.xlabel(x_label)
    plt.ylabel(y_label)
  def _set_labels(self):
    plt.xlabel(g.x_lbl)
    plt.ylabel(g.y_lbl)
    self.debug.prn(self, 'Labels set.', 3)
  def set_output_filename(self, filename):
    self.filename = filename
    self.debug.prn(self, 'Filename set.')
  def set_title(self, title):
    plt.title(title)
    self.debug.prn(self, 'Title set.')
  def get_sketches(self):
    return self.sketches
  def load(self, sketches):
    if type(sketches) == list:
      for sketch in sketches:
        self.sketches.append(sketch)
      self.debug.prn(self, 'Sketches loaded')
    elif issubclass(type(sketches), Sketch):
      self.sketches.append(sketches)
      self.debug.prn(self, 'Sketch loaded.')
    else:
      self.debug.prn(self, 'load() takes either a Sketch or a list', 1)
  def _plot(self):
    for sketch in self.sketches:
      sketch.plot()
  def show(self):
    self._plot()
    plt.show()
  def save(self):
    self._plot()
    plt.savefig(self.filename)
  def close(self):
    plt.close()
    self.debug.prn(self, 'Plot closed.')
Exemple #11
0
class SQLConnection(object):
    def __init__(self, addr):
        self.addr = addr
        self.conn = sql.connect(addr)
        self.crsr = self.conn.cursor()
        self.debug = Debugger()
        self.debug.prn(self, 'SQLConnection object created.')
        self.debug.prn(self, 'Connection established.')

    def class_name(self):
        return 'SQLConnection'

    def get_addr(self):
        return self.addr

    def set_addr(self, addr):
        self.addr = addr

    def get_conn(self):
        return self.conn

    def get_crsr(self):
        return self.crsr

    def queue(self, sql_code):
        self.crsr.execute(sql_code)
        self.debug.prn(self, 'Queued command.')

    def queue_script(self, script):
        self.crsr.executescript(script)
        self.debug.prn(self, 'Queued script.')

    def queue_for_all(self, sql_code, data):
        self.crsr.executemany(sql_code, data)
        self.debug.prn(self, 'Queued command for all.')

    def commit(self):
        self.conn.commit()
        self.debug.prn(self, 'Committed queue.')

    def close(self):
        self.conn.close()
        self.debug.prn(self, 'Connection terminated.')

    def fetch(self, select_code=None):
        if select_code != None:
            self.queue(select_code)
        self.debug.prn(self, 'Data fetched.')
        return self.crsr.fetchall()
Exemple #12
0
 def __init__(self):
     self.debug = Debugger()
Exemple #13
0
class Analyzer(object):
    def __init__(self):
        self.debug = Debugger()

    def class_name(self):
        return "Analyzer"

    def get_confusion_matrix(self, model, threshold):
        # model --> Model
        ds = model.get_dataset()
        y_true = ds.get_output_col()  # --> [5,3,8,1,6,0] = y_true
        y_pred = []
        for x in ds.get_input_cols():
            y_pred.append(model.get_f()(x))
        #                     f(x)
        #                           vs.  y
        tp, tn, fp, fn = []
        for y_p, y_t in zip(y_pred, y_true):
            if y_t > threshold:
                if y_p > threshold:
                    tp += 1
                else:
                    fn += 1
            else:
                if y_p > threshold:
                    fp += 1
                else:
                    tn += 1
            return [[tp, fp], [fn, tn]]

    def get_tp(self, model, threshold):
        return self.get_confusion_matrix(model, threshold)[0][0]

    def get_fp(self, model, threshold):
        return self.get_confusion_matrix(model, threshold)[1][0]

    def get_fn(self, model, threshold):
        return self.get_confusion_matrix(model, threshold)[0][1]

    def get_tn(self, model, threshold):
        return self.get_confusion_matrix(model, threshold)[1][1]

    def get_specificity(self, model, threshold):  # Harry
        # https://en.wikipedia.org/wiki/Sensitivity_and_specificity
        tn = self.get_tn()
        fp = self.get_fp()
        return (tn / (tn + fp))

    def get_sensitivity(self):  # Harry
        tp = self.get_tp()
        fn = self.get_fn()
        return (tp / (tp + fn))

    def get_precision(self):  # Harry
        tp = self.get_tp()
        fp = self.get_fp()
        return (tp / (tp + fp))

    def get_recall(self):  # Harry
        tp = self.get_tp()
        fn = self.get_fn()
        return (tp / (tp + fn))

    def get_accuracy(self):  # Harry
        tp = self.get_tp()
        tn = self.get_tn()
        fp = self.get_fp()
        fn = self.get_fn()
        return ((tp + fn) / (tp + tn + fp + fn))

    def get_fallout(self):  # Harry
        fp = self.get_fp()
        tn = self.get_tn()
        return (fp / (fp + tn))

    def get_bias(self):  # Harry
        pass

    def get_mean(self):  # Harry
        pass

    def get_auc(self):  # Harry
        pass

    def get_p_by_f_dist(self):  # Harry
        pass

    def get_variance(self, model):
        x_vals = model.get_training_x()
        x_av = np.mean(x_vals)
        f = model.get_f()
        sst = 0
        for x in x_vals:
            sst += (x - x_av)**2
        return sst / len(x_vals)

    def get_variance_by_parts(self, f, x_vals):
        x_av = np.mean(x_vals)
        sst = 0
        for x in x_vals:
            sst += (x - x_av)**2
        return sst / len(x_vals)

    def get_r_sq(self, model):
        y_av = np.mean(model.get_training_y())
        f = model.get_f()
        ss_res = 0
        ss_tot = 0
        for x, y in zip(model.get_training_x(), model.get_training_y()):
            ss_res += (y - f(x))**2
            ss_tot += (y - y_av)**2
        g.debug.prn(self, 'Variance generated.')
        return 1 - (ss_res / ss_tot)

    def plot_roc(self):
        plotter = Plotter()
        plotter.set_title('Receiver Operating Characteristic')
        plotter.set_axis_labels('')
        plotter.set_output_filename()  #TODO: Fill filename
        plotter.close()
        pass  # Save the image as "roc.png"

    def get_ss_res(self, coords, f):
        ss = 0
        for coord in coords:
            ss += (coord[1] - f(coord[0]))**2
        return ss

    def ssr_curve(self,
                  x,
                  y,
                  slopes=[
                      0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 1.5, 2.0, 2.5, 3.0,
                      4.0, 5.0, 7.5, 10.0
                  ]):
        ssrs = []
        for slope in slopes:
            yint = (np.mean(y) - slope * np.mean(x))
            ssrs.append(
                self.get_ss_res(zip(x, y), lambda val: slope * val + yint))
        image_manager = ImageManager()
        plotter = Plotter()
        plotter.set_title('Sum of Squared Residuals')
        plotter.set_axis_labels('Slope Selected', 'Sum of Squared Residual')
        plotter.set_output_filename(g.files['ls-ssr'])
        ssr_plot = ScatterSketch()
        ssr_plot.add_x(slopes)
        ssr_plot.add_y(ssrs)
        plotter.load(ssr_plot)
        plotter.save()
        plotter.close()
        g.debug.prn(self, 'Drawn Sum of Squared Residuals Plot')
        image_manager.scale(g.files['ls-ssr'], g.files['ls-ssr'], 250)

    def least_squares_slope_yint_eqn(self, x, y):
        n = len(x)
        sum_x = sum(x)
        sum_y = sum(y)
        sum_xy = sum(map(lambda x, y: x * y, x, y))
        sum_x_sq = sum(map(lambda x: x**2, x))
        x_av = np.mean(x)
        y_av = np.mean(y)

        slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x_sq - (sum_x**2))
        yint = y_av - slope * x_av
        return slope, yint

    def f_dist(self, model_type, trials):
        plotter = Plotter()
        image_manager = ImageManager()

        plotter.set_title('F Distribution')
        plotter.set_axis_labels('Frequency', 'F Score')
        plotter.set_output_filename(g.files['least-squares-f'])

        histogram = HistogramSketch()
        for i in range(trials):
            x_vals = g.randomizer.random_list(g.points_to_gen, g.lower_x_bound,
                                              g.upper_x_bound)
            y_vals = g.randomizer.random_list(g.points_to_gen, g.lower_y_bound,
                                              g.upper_y_bound)

            if model_type == LinearModel:
                slope, yint = self.least_squares_slope_yint_eqn(x_vals, y_vals)
                func = lambda x: slope * x + yint
            else:
                g.debug.prn(self, 'Incompatible model type.', 1)
                break

            ss_fit = self.get_ss_res(zip(x_vals, y_vals), func)
            ss_mean = self.get_ss_res(zip(x_vals, y_vals),
                                      lambda x: np.mean(x_vals))
            p_fit = 2  # TODO: Update for Dataframe
            p_mean = 1  # ""
            n = len(x_vals)

            if ss_fit == 0 or (n - p_fit) == 0 or (p_fit - p_mean) == 0:
                self.debug.prn(self, 'F distribution cannot divide by zero.',
                               1)
                continue
            numerator = (ss_mean - ss_fit) / (p_fit - p_mean)
            denominator = ss_fit / (n - p_fit)

            histogram.add_x(numerator / denominator)
            histogram.set_bins()

        plotter.load(histogram)
        plotter.save()
        plotter.close()
        image_manager.scale(g.files['least-squares-f'],
                            g.files['least-squares-f'], 250)
        self.debug.prn(self, 'F distribution created.')
class Analyzer(object):
  def __init__(self):
    self.debug = Debugger()
  def class_name(self):
    return "Analyzer"
  def get_confusion_matrix(self): # Harry
    pass
  def get_specificity(self): # Harry
    pass
  def get_sensitivity(self): # Harry
    pass
  def get_precision(self): # Harry
    pass 
  def get_recall(self): # Harry
    pass
  def get_accuracy(self): # Harry
    pass
  def get_fallout(self): # Harry
    pass
  def get_bias(self): # Harry
    pass
  def get_mean(self): # Harry
    pass
  def get_auc(self): # Harry
    pass
  def get_p_by_f_dist(self): # Harry
    pass
  def get_variance(self, coords, f):
    return self.get_ss_res(coords, f) / len(coords[0])
  def get_r_sq(self, model):
    f_mean = lambda x : np.average(model.training_x)
    x = model.get_training_x()
    y = model.get_training_y()
    f = model.get_f()
    var_mean = self.get_variance(zip(x, y), f_mean)
    var_fit = self.get_variance(zip(x, y), f)
    g.debug.prn(self, 'R squared calculated.')
    return (var_mean - var_fit) / var_mean
  def plot_roc(self):
    plotter = Plotter()
    plotter.set_title('Receiver Operating Characteristic')
    plotter.set_axis_labels('')
    plotter.close()
    pass # Save the image as "roc.png"
  def get_ss_res(self, coords, f):
    ss = 0
    for coord in coords:
      ss += (coord[1] - f(coord[0])) ** 2
    return ss
  def ssr_curve(self, plotter_func, slopes):
    # TODO: Sum of squared residuals plot
    # ssr.png
    pass
  def least_squares_slope_yint_eqn(self, x, y):
    n = len(x)
    sum_x = sum(x)
    sum_y = sum(y)
    sum_xy = sum(map(lambda x,y : x * y, x, y))
    sum_x_sq = sum(map(lambda x : x ** 2, x))
    x_av = np.mean(x)
    y_av = np.mean(y)

    slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x_sq - (sum_x ** 2))
    yint = y_av - slope * x_av
    return slope, yint
  def f_dist(self, model_type, trials):
    plotter = Plotter()
    image_manager = ImageManager()

    plotter.set_title('F Distribution')
    plotter.set_axis_labels('Frequency', 'F Score')
    plotter.set_output_filename('imgs/f.png')

    histogram = HistogramSketch()
    for i in range(trials):
      x_vals = g.randomizer.random_list(g.points_to_gen, g.lower_x_bound, g.upper_x_bound)
      y_vals = g.randomizer.random_list(g.points_to_gen, g.lower_y_bound, g.upper_y_bound)
      
      if model_type == LinearModel:
        slope, yint = self.least_squares_slope_yint_eqn(x_vals, y_vals)
        func = lambda x : slope * x + yint 
      else:
        g.debug.prn(self, 'Incompatible model type.', 1)
        break

      ss_fit = self.get_ss_res(zip(x_vals, y_vals), func)
      ss_mean = self.get_ss_res(zip(x_vals, y_vals), lambda x : np.mean(x_vals))
      p_fit = 2 # TODO: Update for Dataframe
      p_mean = 1 # ""
      n = len(x_vals)
      
      if ss_fit == 0 or (n - p_fit) == 0 or (p_fit - p_mean) == 0:
        self.debug.prn(self, 'F distribution cannot divide by zero.', 1)
        continue
      numerator = (ss_mean - ss_fit) / (p_fit - p_mean)
      denominator = ss_fit / (n - p_fit)

      histogram.add_x(numerator / denominator)
      histogram.set_bins()
    
    plotter.load(histogram)
    plotter.save()
    plotter.close()
    image_manager.scale('imgs/f.png', 'imgs/f.png', 250)
    self.debug.prn(self, 'F distribution created.')
 def __init__(self, num_of_inputs):
   self.debug = Debugger()
   self.n = num_of_inputs