Example #1
0
    def _identify_entities(self):
        ''' Identify Entities in ER '''

        # STEP 1: find core relations
        self._find_core_relations()

        # STEP 2: find IDD relations
        self._find_IDD_relations()

        # STEP 3: find component relations
        self._find_comp_relations()

        # STEP 4: find ISA relations
        self._find_ISA_relations()

        # Identify Entities
        # each core relation would result in an Entity
        for cname, R in self._core_relations.items():
            if cname not in self._IDD_relations:
                # a regular entity
                self._add_entity(R, EntityType.regular)
            else:
                # Add IDD to Entities
                # Add corresponding Relationship
                dname = self.IDD_relations[cname] # the relation name on which cname depends
                self._add_entity(self._E_relations[cname], EntityType.IDD)
                rel = Relationship('ID', RelationshipType.IDD)
                rel.add_participating_entity(cname, '1')
                rel.add_participating_entity(dname, 'm')
                self._add_relationship(rel, RelationshipType.IDD)


        # Incorporate each component relation
        for cname, comp in self._comp_relations.items():

            for comp_name, comp_card in comp:
                R = self._E_relations[comp_name]
                identifier = self._entities[cname].identifier

                if comp_card == Cardinality.many2many:
                    # non-prime attributes of R become m:m attributes
                    # other keys not containing identifier of Entity become 1:m attributes
                    # other primes in the same key with identifier of Entity become m:m attributes
                    m2m_attrs  = set(R.non_primes)
                    for k in R.keys:
                        attr_elem = identifier.elements
                        if not len(k.intersection(attr_elem)):
                            attr_name = '-'.join(k)
                            attribute = Attribute(attr_name, k, Cardinality.one2many)
                            self._entities[cname].add_attribute(attribute)
                        else:
                            m2m_attrs.update(k.difference(attr_elem))
                            
                    for attr in m2m_attrs:
                        # set m:m attributes
                        attribute = Attribute(attr, frozenset([attr]), Cardinality.many2many)
                        self._entities[cname].add_attribute(attribute)

                elif comp_card == Cardinality.many2one:
                    # non-primes attributes of R become m:1 attributes
                    # keys other than identifier become 1:1 attributes
                    for k in R.keys:
                        if k != identifier:
                            attr_name = '_'.join(k)
                            attribute = Attribute(attr_name, k, Cardinality.one2one)
                            self._entities[cname].add_attribute(attribute)
                    for attr in R.non_primes:
                        attribute = Attribute(attr, frozenset([attr]), Cardinality.many2one)
                        self._entities[cname].add_attribute(attribute)

                elif comp_card == Cardinality.one2many:
                    # all the keys of R become 1:m attributes
                    # non_prims other than identifier become m:m attributes
                    for k in R.keys:
                        attr_name = '_'.join(k)
                        attribute = Attribute(attr_name, k, Cardinality.one2many)
                        self._entities[cname].add_attribute(attribute)
                    for attr in R.non_primes:
                        if attr != identifier:
                            attribute = Attribute(attr, frozenset([attr]), Cardinality.many2many)
                            self._entities[cname].add_attribute(attribute)



        # Add correspinding ISA Relationship
        # A ISA B
        for RA1, RB1 in self._ISA_relations.items():
            for RA2, RB2 in self._ISA_relations.items():
                if RA1 == RB2 and RA2 == RB1:
                    # TODO: combine together
                    continue
            # add relationship
            rel = Relationship('ISA', RelationshipType.ISA)
            rel.add_participating_entity(RA1, 'm')
            rel.add_participating_entity(RB1, '1')
            self._add_relationship(rel, RelationshipType.ISA)

        # Find the case of mix enttity and relationship relation
        # that is the nonprimes of a core Entity R have other
        # identifiers of Entities

        # all the entity identifiers
        entity_idr = [ent.identifier.elements for ent in self._entities.values()]
        # sort the entity_idr by lenght
        # this is for the case of IDD entity, which would contain other identifier of entity
        entity_idr = sorted(entity_idr, key=len, reverse=True)
        for cname, R in self._core_relations.items():
            # the keys refed by non prime fkeys
            nonprimes = R.non_primes
            if R.fkeys is not None:
                np_refed_keys = {refed_fk.refed_key: fk for fk, refed_fk in R.fkeys.items() if fk.issubset(nonprimes)}
            else:
                continue
            if np_refed_keys:
                for idr in entity_idr:
                    if idr in np_refed_keys:
                        # R is a mix of entity and another binary relationship
                        # 1) exclude the idr from R
                        # 2) add corresponding relationship

                        # exclude idr from R
                        ent = self._entities[cname]
                        ent.remove_attribute(np_refed_keys[idr])
                        # add corresponding relationship
                        for ent_name, ent in self._entities.items():
                            # find the name of the other entity
                            if ent.identifier.elements == idr:
                                break
                        rel_name = '{}_{}'.format(cname, ent_name)
                        rel = Relationship(rel_name, RelationshipType.regular)
                        rel.add_participating_entity(cname, 'm')
                        rel.add_participating_entity(ent_name, '1')
                        self._add_relationship(rel, RelationshipType.regular)
Example #2
0
    def _identify_relationships(self):
        ''' Identify relationships in relations
            
            Identify relationships by the following steps
            (1) The relation must have more than one disjoint foreign keys  
                which referencing to identifiers of Entities
                Let such fkey be E_fkey
            (2) Examining primary key
                1). if pkey has NO E_fkey
                    pkey is a 1:m attribute of A Relationship, whose Identifier is 
                    other E_fkey in this relation.
                    The attribute will be assign to the Relationship if exists
                    otherwise the relation unassigned
                2). pkey has ONLY E_fkeys
                    if other E_fkeys exists in relation, 
                    then relation represents m : 1 Relationship
                    m: Entities whose identifiers are in pkey
                    1: other E_fkeys
                    If other attributes exists as nonprimes of relation
                    they are m:1 attributes of Relationship
                    If other keys which are not identifiers of Entitiy exists
                    they are 1:1 attributes of Relationship
                3). pkey has E_fkeys and other attributes
                    if all-key relation
                    then this represents m:m Relationship with multivalued attributes
                    else unassigned

        '''

        def examine_pkey(pkey, E_fkeys, R):
            ''' examine pkey, partition pkey into those in E_fkeys and others '''

            idr_in_pkey ={fk: R.fkeys[fk].refed_key for fk in E_fkeys if fk.issubset(pkey)}
            otr_attr = pkey.difference(set([a for fk in idr_in_pkey for a in fk]))
            return idr_in_pkey, otr_attr

        # The following assume all identifiers are disjoint
        # this assumption would be a probem if exists relations referencing to IDD entites
        # find all Identifiers of Entitnies 
        # relaxed universal assumption, all identifiers are unique
        all_identifiers = {ent.identifier.elements: name for name, ent in self._entities.items()}

        # unassigned_attributes
        unassigned_attributes = {}

        for name, R in self._R_relations.items():
            # find the E_fkeys of relation R
            pkey = R.pkey
            E_fkeys = [fk_name for fk_name, fk  in R.fkeys.items() if fk.refed_key in all_identifiers]
            dsj_fkey_count = self._count_disjoint_keys(E_fkeys)

            # 1) more than one fkeys referencing Entitites
            if dsj_fkey_count > 1:
                idr_in_pkey, otr_attr = examine_pkey(pkey, E_fkeys, R) 
                if not idr_in_pkey:
                    # pkey has NO E_fkey
                    R_identifier = frozenset([npa for npa in R.non_primes if frozenset([npa]) in all_identifiers])
                    otr_non_primes = R.non_primes.difference(R_identifier)
                    if not otr_non_primes:
                        unassigned_attributes.update({R_identifer: pkey})

                elif not otr_attr:
                    # 2) only E_fkeys
                    rel = Relationship(name, RelationshipType.regular, frozenset(pkey))
                    for idr, E_name in all_identifiers.items():
                        if idr in idr_in_pkey.values():
                            rel.add_participating_entity(E_name, 'm')

                    for npa in R.non_primes:
                        key = frozenset([npa])
                        if key in all_identifiers:
                            rel.add_participating_entity(all_identifiers[key], '1')
                        else:
                            attr = Attribute(npa, key, Cardinality.many2one)
                            rel.add_attribute(attr)

                    self._add_relationship(rel, RelationshipType.regular)

                else:
                    # 3) pkey has other attributes
                    if R.attributes == pkey:
                        # all key relation
                        rel = Relationship(name, RelationshipType.regular, frozenset(pkey))
                        for idr, E_name in all_identifiers.items():
                            if idr in idr_in_pkey.values():
                                rel.add_participating_entity(E_name, 'm')
                        for npa in otr_attr:
                            key = frozenset([npa])
                            attr = Attribute(npa, key, Cardinality.many2many)
                            rel.add_attribute(attr)

                        self._add_relationship(rel, RelationshipType.regular)
                    else:
                        self._unassigned_relations.append(R)

        # After determine the main relationships 
        # try to comibine with other E_relations left
        self._combine_relationship()