Beispiel #1
0
    def _only_one_stem_per_notehead(self, cropobjects, edges):
        _cdict = {c.objid: c for c in cropobjects}

        # Collect stems per notehead
        stems_per_notehead = collections.defaultdict(list)
        stem_objids = set()
        for f_objid, t_objid in edges:
            f = _cdict[f_objid]
            t = _cdict[t_objid]
            if (f.clsname in _CONST.NOTEHEAD_CLSNAMES) and \
                (t.clsname == 'stem'):
                stems_per_notehead[f_objid].append(t_objid)
                stem_objids.add(t_objid)

        # Pick the closest one (by minimum distance)
        closest_stems_per_notehead = dict()
        for n_objid in stems_per_notehead:
            n = _cdict[n_objid]
            stems = [_cdict[objid] for objid in stems_per_notehead[n_objid]]
            closest_stem = min(stems, key=lambda s: cropobject_distance(n, s))
            closest_stems_per_notehead[n_objid] = closest_stem.objid

        # Filter the edges
        edges = [(f_objid, t_objid) for f_objid, t_objid in edges
                 if (f_objid not in closest_stems_per_notehead) or (
                     t_objid not in stem_objids) or (
                         closest_stems_per_notehead[f_objid] == t_objid)]

        return edges
Beispiel #2
0
    def get_features_distance_relative_bbox_and_clsname(self, c_from, c_to):
        """Extract a feature vector from the given pair of CropObjects.
        Does *NOT* convert the class names to integers.

        Features: bbox(c_to) - bbox(c_from), clsname(c_from), clsname(c_to)
        Target: 1 if there is a link from u to v

        Returns a tuple.
        """
        target = 0
        if c_from.doc == c_to.doc:
            if c_to.objid in c_from.outlinks:
                target = 1
        distance = cropobject_distance(c_from, c_to)
        features = (distance, c_to.top - c_from.top, c_to.left - c_from.left,
                    c_to.bottom - c_from.bottom, c_to.right - c_from.right,
                    c_from.clsname, c_to.clsname, target)
        dist, dt, dl, db, dr, cu, cv, tgt = features
        if cu.startswith('letter'): cu = 'letter'
        if cu.startswith('numeral'): cu = 'numeral'
        if cv.startswith('letter'): cv = 'letter'
        if cv.startswith('numeral'): cv = 'numeral'
        feature_dict = {
            'dist': dist,
            'dt': dt,
            'dl': dl,
            'db': db,
            'dr': dr,
            'cls_from': cu,
            'cls_to': cv,
            'target': tgt
        }
        return feature_dict
Beispiel #3
0
    def _every_full_notehead_has_a_stem(self, cropobjects, edges):
        _cdict = {c.objid: c for c in cropobjects}

        # Collect stems per notehead
        notehead_objids = set(
            [c.objid for c in cropobjects if c.clsname == 'notehead-full'])
        stem_objids = set(
            [c.objid for c in cropobjects if c.clsname == 'stem'])

        noteheads_with_stem_objids = set()
        stems_with_notehead_objids = set()
        for f, t in edges:
            if _cdict[f].clsname == 'notehead-full':
                if _cdict[t].clsname == 'stem':
                    noteheads_with_stem_objids.add(f)
                    stems_with_notehead_objids.add(t)

        noteheads_without_stems = {
            n: _cdict[n]
            for n in notehead_objids if n not in noteheads_with_stem_objids
        }
        stems_without_noteheads = {
            n: _cdict[n]
            for n in stem_objids if n not in stems_with_notehead_objids
        }

        # To each notehead, assign the closest stem that is not yet taken.
        closest_stem_per_notehead = {
            objid: min(stems_without_noteheads,
                       key=lambda x: cropobject_distance(_cdict[x], n))
            for objid, n in noteheads_without_stems.items()
        }

        # Filter edges that are too long
        _n_before_filter = len(closest_stem_per_notehead)
        closest_stem_threshold_distance = 80
        closest_stem_per_notehead = {
            n_objid: s_objid
            for n_objid, s_objid in closest_stem_per_notehead.items()
            if cropobject_distance(_cdict[n_objid], _cdict[s_objid]) <
            closest_stem_threshold_distance
        }

        return edges + list(closest_stem_per_notehead.items())
Beispiel #4
0
    def extract_all_pairs(self, cropobjects):
        pairs = []
        features = []
        for u in cropobjects:
            for v in cropobjects:
                if u.objid == v.objid:
                    continue
                distance = cropobject_distance(u, v)
                if distance < self.MAXIMUM_DISTANCE_THRESHOLD:
                    pairs.append((u, v))
                    f = self.extractor(u, v)
                    features.append(f)

        # logging.info('Parsing features: {0}'.format(features[0]))
        features = numpy.array(features)
        # logging.info('Parsing features: {0}/{1}'.format(features.shape, features))
        return pairs, features
Beispiel #5
0
def symbol_distances(cropobjects):
    """For each pair of cropobjects, compute the closest distance between their
    bounding boxes.

    :returns: A dict of dicts, indexed by objid, then objid, then distance.
    """
    _start_time = time.clock()
    distances = {}
    for c in cropobjects:
        distances[c] = {}
        for d in cropobjects:

            if d not in distances:
                distances[d] = {}
            if d not in distances[c]:
                delta = cropobject_distance(c, d)
                distances[c][d] = delta
                distances[d][c] = delta
    print('Distances for {0} cropobjects took {1:.3f} seconds'
          ''.format(len(cropobjects),
                    time.clock() - _start_time))
    return distances
Beispiel #6
0
    def get_closest_objects(self, cropobjects: List[CropObject],
                            threshold) -> Dict[CropObject, List[CropObject]]:
        """For each pair of cropobjects, compute the closest distance between their
        bounding boxes.

        :returns: A dict of dicts, indexed by objid, then objid, then distance.
        """
        close_objects = {}
        for c in cropobjects:
            close_objects[c] = []

        for c in cropobjects:
            for d in cropobjects:
                distance = cropobject_distance(c, d)
                if distance < threshold:
                    close_objects[c].append(d)
                    close_objects[d].append(c)

        # Remove duplicates from lists
        for key, neighbors in close_objects.items():
            unique_neighbors = list(dict.fromkeys(neighbors))
            close_objects[key] = unique_neighbors

        return close_objects