コード例 #1
0
ファイル: glyph_db.py プロジェクト: zhencang/helit
def gen_bias(lg, hg):
    """Helper function - creates and returns a bias object for use when creating Glyphs. Basically weights each line with the amount of ink on it, so when a writter uses every other line it strongly biases towards letters being assigned to the lines they wrote on."""
    bias = defaultdict(float)

    # Transform the line graph to line space...
    ls_lg = LineGraph()
    ls_lg.from_many(lg)

    ihg = la.inv(hg)
    ls_lg.transform(ihg, True)

    # Add weight from all of the line segments...
    for ei in xrange(ls_lg.edge_count):
        edge = ls_lg.get_edge(ei)

        vf = ls_lg.get_vertex(edge[0])
        vt = ls_lg.get_vertex(edge[1])

        dx = vt[0] - vf[0]
        dy = vt[1] - vf[1]

        mass = (vf[5] + vt[5]) * numpy.sqrt(dx * dx + dy * dy)
        line = int(numpy.floor(0.5 * (vt[1] + vf[1])))

        bias[line] += mass

    # Normalise and return...
    maximum = max(bias.values())

    for key in bias.keys():
        bias[key] /= maximum

    return bias
コード例 #2
0
ファイル: glyph_db.py プロジェクト: eosbamsi/helit
def gen_bias(lg, hg):
  """Helper function - creates and returns a bias object for use when creating Glyphs. Basically weights each line with the amount of ink on it, so when a writter uses every other line it strongly biases towards letters being assigned to the lines they wrote on."""
  bias = defaultdict(float)
  
  # Transform the line graph to line space...
  ls_lg = LineGraph()
  ls_lg.from_many(lg)
  
  ihg = la.inv(hg)
  ls_lg.transform(ihg, True)
  
  # Add weight from all of the line segments...
  for ei in xrange(ls_lg.edge_count):
    edge = ls_lg.get_edge(ei)
    
    vf = ls_lg.get_vertex(edge[0])
    vt = ls_lg.get_vertex(edge[1])
    
    dx = vt[0] - vf[0]
    dy = vt[1] - vf[1]
    
    mass = (vf[5] + vt[5]) * numpy.sqrt(dx*dx + dy*dy)
    line = int(numpy.floor(0.5 * (vt[1] + vf[1])))
    
    bias[line] += mass
  
  # Normalise and return...
  maximum = max(bias.values())
  
  for key in bias.keys():
    bias[key] /= maximum
  
  return bias
コード例 #3
0
ファイル: glyph_db.py プロジェクト: zhencang/helit
class Glyph:
    """Represents a glyph, that has been transformed into a suitable coordinate system; includes connectivity information."""
    def __init__(self, lg, seg, hg, extra=0.4, bias=None):
        """Given a segmented LineGraph and segment number this extracts it, transforms it into the standard coordinate system and stores the homography used to get there. (hg transforms from line space, where there is a line for each y=integer, to the space of the original pixels.) Also records its position on its assigned line and line number so it can be ordered suitably. Does not store connectivity information - that is done later. extra is used for infering the line position, and is extra falloff to have either side of a line voting for it - a smoothing term basically. bias is an optional dictionary indexed by line number that gives a weight to assign to being assigned to that line - used to utilise the fact that data collection asks the writter to use every-other line, which helps avoid misassigned dropped j's for instance."""
        if lg == None: return

        # Extract the line graph...
        self.lg = LineGraph()
        self.adjacent = self.lg.from_segment(lg, seg)
        self.seg = seg

        # Tranform it to line space...
        ihg = la.inv(hg)
        self.lg.transform(ihg, True)

        # Check if which line its on is tagged - exists as an override for annoying glyphs...
        line = None
        for tag in self.lg.get_tags():
            if tag[0] == 'line':
                # We have a tag of line - its position specifies the line the glyph is on...
                point = self.lg.get_point(tag[1], tag[2])
                line = int(numpy.floor(point[1]))
                break

        # Record which line it is on and its position along the line...
        # (Works by assuming that the line is the one below the space where most of the mass of the glyph is. Takes it range to be that within the space, so crazy tails are cut off.)
        min_x, max_x, min_y, max_y = self.lg.get_bounds()
        self.source = (0.5 * (min_x + max_x), 0.5 * (min_y + max_y))

        if line == None:
            best_mass = 0.0
            self.left_x = min_x
            self.right_x = max_x
            line = 0

            start = int(numpy.trunc(min_y))
            for pl in xrange(start, int(numpy.ceil(max_y))):
                mass = 0.0
                low_y = float(pl) - extra
                high_y = float(pl + 1) + extra

                left_x = None
                right_x = None

                for es in self.lg.within(min_x, max_x, low_y, high_y):
                    for ei in xrange(*es.indices(self.lg.edge_count)):
                        edge = self.lg.get_edge(ei)
                        vf = self.lg.get_vertex(edge[0])
                        vt = self.lg.get_vertex(edge[1])

                        if vf[1] > low_y and vf[1] < high_y and vt[
                                1] > low_y and vt[1] < high_y:
                            dx = vt[0] - vf[0]
                            dy = vt[1] - vf[1]
                            mass += (vf[5] + vt[5]) * numpy.sqrt(dx * dx +
                                                                 dy * dy)

                            if left_x == None: left_x = min(vf[0], vt[0])
                            else: left_x = min(vf[0], vt[0], left_x)

                            if right_x == None: right_x = max(vf[0], vt[0])
                            else: right_x = max(vf[0], vt[0], right_x)

                mass *= 1.0 / (1.0 + pl - start
                               )  # Bias to choosing higher, for tails.

                if bias != None:
                    mass *= bias[pl]

                if mass > best_mass:
                    best_mass = mass
                    self.left_x = left_x
                    self.right_x = right_x
                    line = pl

        # Transform it so it is positioned to be sitting on line 1 of y, store the total homography that we have applied...
        self.offset_x = -min_x
        self.offset_y = -line

        hg = numpy.eye(3, dtype=numpy.float32)
        hg[0, 2] = self.offset_x
        hg[1, 2] = self.offset_y

        self.left_x += self.offset_x
        self.right_x += self.offset_x

        self.lg.transform(hg)

        self.transform = numpy.dot(hg, ihg)

        # Set as empty its before and after glyphs - None if there is no adjacency, or a tuple if there is: (glyph, list of connecting (link glyph, shared vertex in this, shared vertex in glyph, vertex in link glyph on this side, vertex in link glyph on glyph side), empty if none.)...
        self.left = None
        self.right = None

        # Extract the character this glyph represents...
        tags = self.lg.get_tags()
        codes = [
            t[0] for t in tags if len(filter(lambda c: c != '_', t[0])) == 1
        ]
        self.key = codes[0] if len(codes) != 0 else None

        self.code = -id(self)

        # Cache stuff...
        self.mass = None
        self.center = None
        self.feat = None
        self.v_offset = None

    def clone(self):
        """Returns a clone of this Glyph."""
        ret = Glyph(None, None, None)

        ret.lg = self.lg
        ret.adjacent = self.adjacent
        ret.seg = self.seg

        ret.source = self.source

        ret.left_x = self.left_x
        ret.right_x = self.right_x

        ret.offset_x = self.offset_x
        ret.offset_y = self.offset_y
        ret.transform = self.transform

        ret.left = self.left
        ret.right = self.right
        ret.key = self.key

        ret.code = self.code

        ret.mass = None if self.mass == None else self.mass.copy()
        ret.center = None if self.center == None else self.center.copy()
        ret.feat = None if self.feat == None else map(lambda a: a.copy(),
                                                      self.feat)
        ret.v_offset = self.v_offset

        return ret

    def get_linegraph(self):
        return self.lg

    def orig_left_x(self):
        return self.left_x - self.offset_x

    def orig_right_x(self):
        return self.right_x - self.offset_x

    def get_mass(self):
        """Returns a vector of [average density, average radius] - used for matching adjacent glyphs."""
        if self.mass == None:
            self.mass = numpy.zeros(2, dtype=numpy.float32)
            weight = 0.0
            for i in xrange(self.lg.vertex_count):
                info = self.lg.get_vertex(i)

                weight += 1.0
                self.mass += (numpy.array([info[6], info[5]]) -
                              self.mass) / weight

        return self.mass

    def get_center(self):
        """Returns the 'center' of the glyph - its density weighted in an attempt to make it robust to crazy tails."""
        if self.center == None:
            self.center = numpy.zeros(2, dtype=numpy.float32)
            weight = 0.0

            for i in xrange(self.lg.vertex_count):
                info = self.lg.get_vertex(i)
                w = info[5] * info[5] * info[
                    6]  # Radius squared * density - proportional to quantity of ink, assuming (correctly as rest of system currently works) even sampling.
                if w > 1e-6:
                    weight += w
                    mult = w / weight
                    self.center[0] += (info[0] - self.center[0]) * mult
                    self.center[1] += (info[1] - self.center[1]) * mult

        return self.center

    def get_voffset(self):
        """Calculates and returns the vertical offset to apply to the glyph that corrects for any systematic bias in its flow calculation."""

        if self.v_offset == None:
            self.v_offset = 0.0
            weight = 0.0

            truth = self.get_center()[1]

            # Calculate the estimated offsets from the left side and update the estimate, correctly factoring in the variance of the offset...
            if self.left != None:
                diff, sd = costs.glyph_pair_offset(self.left[0], self, 0.2,
                                                   True)
                estimate = self.left[0].get_center()[1] + diff
                offset = truth - estimate

                est_weight = 1.0 / (sd**2.0)
                weight += est_weight
                self.v_offset += (offset - self.v_offset) * est_weight / weight

            # Again from the right side...
            if self.right != None:
                diff, sd = costs.glyph_pair_offset(self, self.right[0], 0.2,
                                                   True)
                estimate = self.right[0].get_center()[1] - diff
                offset = truth - estimate

                est_weight = 1.0 / (sd**2.0)
                weight += est_weight
                self.v_offset += (offset - self.v_offset) * est_weight / weight

        return self.v_offset

    def most_left(self):
        """Returns the coordinate of the furthest left vertex in the glyph."""

        info = self.lg.get_vertex(0)
        best_x = info[0]
        best_y = info[1]

        for i in xrange(1, self.lg.vertex_count):
            info = self.lg.get_vertex(0)
            if info[0] < best_x:
                best_x = info[0]
                best_y = info[1]

        return (best_x, best_y)

    def most_right(self):
        """Returns the coordinate of the furthest right vertex in the glyph."""

        info = self.lg.get_vertex(0)
        best_x = info[0]
        best_y = info[1]

        for i in xrange(1, self.lg.vertex_count):
            info = self.lg.get_vertex(0)
            if info[0] > best_x:
                best_x = info[0]
                best_y = info[1]

        return (best_x, best_y)

    def get_feat(self):
        """Calculates and returns a feature for the glyph, or, more accuratly two features, representing (left, right), so some tricks can be done to make their use side dependent (For learning a function for matching to adjacent glyphs.)."""
        if self.feat == None:
            # First build a culumative distribution over the x axis range of the glyph...
            min_x, max_x, min_y, max_y = self.lg.get_bounds()
            culm = numpy.ones(32, dtype=numpy.float32)
            culm *= 1e-2

            min_x -= 1e-3
            max_x += 1e-3

            for i in xrange(self.lg.vertex_count):
                info = self.lg.get_vertex(i)
                w = info[5] * info[5] * info[6]
                t = (info[0] - min_x) / (max_x - min_x)

                t *= (culm.shape[0] - 1)
                low = int(t)
                high = low + 1
                t -= low
                culm[low] += (1.0 - t) * w
                culm[high] += t * w

            culm /= culm.sum()
            culm = numpy.cumsum(culm)

            # Now extract all the per sample features...
            feat_param = {
                'dir_travel': 0.1,
                'travel_max': 1.0,
                'travel_bins': 6,
                'travel_ratio': 0.8,
                'pos_bins': 3,
                'pos_ratio': 0.9,
                'radius_bins': 1,
                'density_bins': 3
            }
            fv = self.lg.features(**feat_param)

            # Combine them into the two halves, by weighting by the culumative; include density and radius as well...
            left = numpy.zeros(fv.shape[1] + 2, dtype=numpy.float32)
            right = numpy.zeros(fv.shape[1] + 2, dtype=numpy.float32)

            left_total = 0.0
            right_total = 0.0

            for i in xrange(self.lg.vertex_count):
                info = self.lg.get_vertex(i)
                w = info[5] * info[5] * info[6]
                t = (info[0] - min_x) / (max_x - min_x)

                t *= (culm.shape[0] - 1)
                low = int(t)
                high = low + 1
                t -= low
                right_w = (1.0 - t) * culm[low] + t * culm[high]
                left_w = 1.0 - right_w

                left[0] += left_w * info[5]
                right[0] += right_w * info[5]

                left[1] += left_w * info[6]
                right[1] += right_w * info[6]

                left[2:] += w * left_w * fv[i, :]
                right[2:] += w * right_w * fv[i, :]

                left_total += left_w
                right_total += right_w

            left[:2] /= left_total
            right[:2] /= right_total
            left[2:] /= max(left[2:].sum(), 1e-6)
            right[2:] /= max(right[2:].sum(), 1e-6)

            self.feat = (left, right)

        return self.feat

    def __str__(self):
        l = self.left[0].key if self.left != None else 'None'
        r = self.right[0].key if self.right != None else 'None'
        return 'Glyph %i: key = %s (%s|%s)' % (self.code, self.key, l, r)
コード例 #4
0
ファイル: glyph_db.py プロジェクト: eosbamsi/helit
class Glyph:
  """Represents a glyph, that has been transformed into a suitable coordinate system; includes connectivity information."""
  def __init__(self, lg, seg, hg, extra = 0.4, bias = None):
    """Given a segmented LineGraph and segment number this extracts it, transforms it into the standard coordinate system and stores the homography used to get there. (hg transforms from line space, where there is a line for each y=integer, to the space of the original pixels.) Also records its position on its assigned line and line number so it can be ordered suitably. Does not store connectivity information - that is done later. extra is used for infering the line position, and is extra falloff to have either side of a line voting for it - a smoothing term basically. bias is an optional dictionary indexed by line number that gives a weight to assign to being assigned to that line - used to utilise the fact that data collection asks the writter to use every-other line, which helps avoid misassigned dropped j's for instance."""
    if lg==None: return

    # Extract the line graph...
    self.lg = LineGraph()
    self.adjacent = self.lg.from_segment(lg, seg)
    self.seg = seg
    
    # Tranform it to line space...
    ihg = la.inv(hg)
    self.lg.transform(ihg, True)
    
    # Check if which line its on is tagged - exists as an override for annoying glyphs...
    line = None
    for tag in self.lg.get_tags():
      if tag[0]=='line':
        # We have a tag of line - its position specifies the line the glyph is on...
        point = self.lg.get_point(tag[1], tag[2])
        line = int(numpy.floor(point[1]))
        break
    
    # Record which line it is on and its position along the line...
    # (Works by assuming that the line is the one below the space where most of the mass of the glyph is. Takes it range to be that within the space, so crazy tails are cut off.)
    min_x, max_x, min_y, max_y = self.lg.get_bounds()
    self.source = (0.5 * (min_x + max_x), 0.5 * (min_y + max_y))
    
    if line==None:   
      best_mass = 0.0
      self.left_x = min_x
      self.right_x = max_x
      line = 0
    
      start = int(numpy.trunc(min_y))
      for pl in xrange(start, int(numpy.ceil(max_y))):
        mass = 0.0
        low_y = float(pl) - extra
        high_y = float(pl+1) + extra
      
        left_x = None
        right_x = None
      
        for es in self.lg.within(min_x, max_x, low_y, high_y):
          for ei in xrange(*es.indices(self.lg.edge_count)):
            edge = self.lg.get_edge(ei)
            vf = self.lg.get_vertex(edge[0])
            vt = self.lg.get_vertex(edge[1])
          
            if vf[1]>low_y and vf[1]<high_y and vt[1]>low_y and vt[1]<high_y:
              dx = vt[0] - vf[0]
              dy = vt[1] - vf[1]
              mass += (vf[5] + vt[5]) * numpy.sqrt(dx*dx + dy*dy)
            
              if left_x==None: left_x = min(vf[0], vt[0])
              else: left_x = min(vf[0], vt[0], left_x)
            
              if right_x==None: right_x = max(vf[0], vt[0])
              else: right_x = max(vf[0], vt[0], right_x)
      
        mass *= 1.0/(1.0+pl - start) # Bias to choosing higher, for tails.
        
        if bias!=None:
          mass *= bias[pl]
      
        if mass>best_mass:
          best_mass = mass
          self.left_x = left_x
          self.right_x = right_x
          line = pl
    
    # Transform it so it is positioned to be sitting on line 1 of y, store the total homography that we have applied...
    self.offset_x = -min_x
    self.offset_y = -line
    
    hg = numpy.eye(3, dtype=numpy.float32)
    hg[0,2] = self.offset_x
    hg[1,2] = self.offset_y
    
    self.left_x += self.offset_x
    self.right_x += self.offset_x
    
    self.lg.transform(hg)
    
    self.transform = numpy.dot(hg, ihg)
    
    # Set as empty its before and after glyphs - None if there is no adjacency, or a tuple if there is: (glyph, list of connecting (link glyph, shared vertex in this, shared vertex in glyph, vertex in link glyph on this side, vertex in link glyph on glyph side), empty if none.)...
    self.left = None
    self.right = None
    
    # Extract the character this glyph represents...
    tags = self.lg.get_tags()
    codes = [t[0] for t in tags if len(filter(lambda c: c!='_', t[0]))==1]
    self.key = codes[0] if len(codes)!=0 else None
    
    self.code = -id(self)
    
    # Cache stuff...
    self.mass = None
    self.center = None
    self.feat = None
    self.v_offset = None


  def clone(self):
    """Returns a clone of this Glyph."""
    ret = Glyph(None, None, None)
    
    ret.lg = self.lg
    ret.adjacent = self.adjacent
    ret.seg = self.seg
    
    ret.source = self.source
    
    ret.left_x = self.left_x
    ret.right_x = self.right_x
    
    ret.offset_x = self.offset_x
    ret.offset_y = self.offset_y
    ret.transform = self.transform
    
    ret.left = self.left
    ret.right = self.right
    ret.key = self.key
    
    ret.code = self.code
    
    ret.mass = None if self.mass==None else self.mass.copy()
    ret.center = None if self.center==None else self.center.copy()
    ret.feat = None if self.feat==None else map(lambda a: a.copy(), self.feat)
    ret.v_offset = self.v_offset
    
    return ret
    
    
  def get_linegraph(self):
    return self.lg
  
  
  def orig_left_x(self):
    return self.left_x - self.offset_x
  
  def orig_right_x(self):
    return self.right_x - self.offset_x

  
  def get_mass(self):
    """Returns a vector of [average density, average radius] - used for matching adjacent glyphs."""
    if self.mass==None:
      self.mass = numpy.zeros(2, dtype=numpy.float32)
      weight = 0.0
      for i in xrange(self.lg.vertex_count):
        info = self.lg.get_vertex(i)
        
        weight += 1.0
        self.mass += (numpy.array([info[6], info[5]]) - self.mass) / weight
    
    return self.mass


  def get_center(self):
    """Returns the 'center' of the glyph - its density weighted in an attempt to make it robust to crazy tails."""
    if self.center==None:
      self.center = numpy.zeros(2, dtype=numpy.float32)
      weight = 0.0
    
      for i in xrange(self.lg.vertex_count):
        info = self.lg.get_vertex(i)
        w = info[5] * info[5] * info[6] # Radius squared * density - proportional to quantity of ink, assuming (correctly as rest of system currently works) even sampling.
        if w>1e-6:
          weight += w
          mult = w / weight
          self.center[0] += (info[0] - self.center[0]) * mult
          self.center[1] += (info[1] - self.center[1]) * mult
    
    return self.center
  
  
  def get_voffset(self):
    """Calculates and returns the vertical offset to apply to the glyph that corrects for any systematic bias in its flow calculation."""
    
    if self.v_offset==None:
      self.v_offset = 0.0
      weight = 0.0
      
      truth = self.get_center()[1]
      
      # Calculate the estimated offsets from the left side and update the estimate, correctly factoring in the variance of the offset...
      if self.left!=None:
        diff, sd = costs.glyph_pair_offset(self.left[0], self, 0.2, True)
        estimate = self.left[0].get_center()[1] + diff
        offset = truth - estimate
        
        est_weight = 1.0 / (sd**2.0)
        weight += est_weight
        self.v_offset += (offset - self.v_offset) * est_weight / weight
    
      # Again from the right side...
      if self.right!=None:
        diff, sd = costs.glyph_pair_offset(self, self.right[0], 0.2, True)
        estimate = self.right[0].get_center()[1] - diff
        offset = truth - estimate
        
        est_weight = 1.0 / (sd**2.0)
        weight += est_weight
        self.v_offset += (offset - self.v_offset) * est_weight / weight
  
    return self.v_offset


  def most_left(self):
    """Returns the coordinate of the furthest left vertex in the glyph."""
    
    info = self.lg.get_vertex(0)
    best_x = info[0]
    best_y = info[1]
    
    for i in xrange(1,self.lg.vertex_count):
      info = self.lg.get_vertex(0)
      if info[0]<best_x:
        best_x = info[0]
        best_y = info[1]
    
    return (best_x, best_y)

  def most_right(self):
    """Returns the coordinate of the furthest right vertex in the glyph."""
    
    info = self.lg.get_vertex(0)
    best_x = info[0]
    best_y = info[1]
    
    for i in xrange(1,self.lg.vertex_count):
      info = self.lg.get_vertex(0)
      if info[0]>best_x:
        best_x = info[0]
        best_y = info[1]
    
    return (best_x, best_y)
  
  
  def get_feat(self):
    """Calculates and returns a feature for the glyph, or, more accuratly two features, representing (left, right), so some tricks can be done to make their use side dependent (For learning a function for matching to adjacent glyphs.)."""
    if self.feat==None:
      # First build a culumative distribution over the x axis range of the glyph...
      min_x, max_x, min_y, max_y = self.lg.get_bounds()
      culm = numpy.ones(32, dtype=numpy.float32)
      culm *= 1e-2
      
      min_x -= 1e-3
      max_x += 1e-3
      
      for i in xrange(self.lg.vertex_count):
        info = self.lg.get_vertex(i)
        w = info[5] * info[5] * info[6]
        t = (info[0] - min_x) / (max_x - min_x)
        
        t *= (culm.shape[0]-1)
        low = int(t)
        high = low + 1
        t -= low
        culm[low] += (1.0 - t) * w
        culm[high] += t * w
      
      culm /= culm.sum()
      culm = numpy.cumsum(culm)
      
      # Now extract all the per sample features...
      feat_param = {'dir_travel':0.1, 'travel_max':1.0, 'travel_bins':6, 'travel_ratio':0.8, 'pos_bins':3, 'pos_ratio':0.9, 'radius_bins':1, 'density_bins':3}
      fv = self.lg.features(**feat_param)
      
      # Combine them into the two halves, by weighting by the culumative; include density and radius as well...
      left = numpy.zeros(fv.shape[1]+2, dtype=numpy.float32)
      right = numpy.zeros(fv.shape[1]+2, dtype=numpy.float32)
      
      left_total = 0.0
      right_total = 0.0
      
      for i in xrange(self.lg.vertex_count):
        info = self.lg.get_vertex(i)
        w = info[5] * info[5] * info[6]
        t = (info[0] - min_x) / (max_x - min_x)
        
        t *= (culm.shape[0]-1)
        low = int(t)
        high = low + 1
        t -= low
        right_w = (1.0-t) * culm[low] + t * culm[high]
        left_w = 1.0 - right_w
        
        left[0] += left_w * info[5]
        right[0] += right_w * info[5]
        
        left[1] += left_w * info[6]
        right[1] += right_w * info[6]
        
        left[2:] += w * left_w * fv[i,:]
        right[2:] += w * right_w * fv[i,:]
        
        left_total += left_w
        right_total += right_w
      
      left[:2] /= left_total
      right[:2] /= right_total
      left[2:] /= max(left[2:].sum(), 1e-6)
      right[2:] /= max(right[2:].sum(), 1e-6)
      
      self.feat = (left, right)
    
    return self.feat


  def __str__(self):
    l = self.left[0].key if self.left!=None else 'None'
    r = self.right[0].key if self.right!=None else 'None'
    return 'Glyph %i: key = %s (%s|%s)' % (self.code, self.key, l, r)