Exemple #1
0
 def _export(self,sect:int,conn:O[Conn],ifinal:Instance) -> L[CQLSection]:
     '''Export an instance to a DB connection'''
     if not conn: return []
     drop   = Exec('cmd_drop',conn,'DROP DATABASE IF EXISTS `%s`'%conn.db,db=False)
     create = Exec('cmd_create',conn,"CREATE SCHEMA `%s`"%conn.db,db=False)
     merge  = Export('cmd_merged',conn,ifinal)
     return  [Title(4, name='Export to database'), drop, create, merge]
Exemple #2
0
    def sections(self, src_conn : Input, tar_conn : Input, merged_conn : Conn)-> L[CQLSection]:

        s_inter  = self._inter() # intermediate schema
        starnc   = self.tar.copy()
        starnc.pes = set(); starnc.oes = set()
        s1       = self.overlap.add_sql_attr(self.src) # only source has 'extra' attrs from landing, possibly

        src      = s1.schema('src',self._ty) # this is an CQL schema, lower level than the CQL interface schema
        tar      = self.tar.schema('tar',self._ty)
        tarnc    = starnc.schema('tar_nc',self._ty)
        inter    = s_inter.schema('inter',self._ty)
        l1,l2    = self._lands()
        isrc,src_sects = self._inst(1,'src',src_conn,s1,src,l1)
        itar,tar_sects = self._inst(2,'tar',tar_conn,self.tar,tar,l2)

        Q  = Query('Q',src,inter,self.qobjs(s_inter))

        maps=[MapObj(src=e.ent(),
                     tar=(self.tar[self.overlap.entities[en]]
                        if en in self.overlap.entities else self.tar[en]).ent(),
                    attrs = {a.attr():self.overlap.patheqs_simple[a].path()
                                for an,a in e.attrs.items()
                                if a in self.overlap.patheqs_simple},
                    fks = {f.fk():self.overlap.patheqs_simple[f].path()
                                for fn,f in e.fks.items()
                                if f in self.overlap.patheqs_simple})

            for en,e in s_inter.entities.items()]

        M      = MapLit('M',inter,tarnc,maps=maps)
        ialt   = EvalInstance('i_altered',Q,isrc)
        imap   = MapInstance('i_mapped','sigma',M,ialt)
        icon   = DelInstance('i_constrained',imap,tar)
        imrg   = CoProdInstance('i_merged',icon,itar,tar)
        final  = self.tar.quotient('i_final',imrg)

        return [Title(0, 1, 'Set-up'), self.default,  self._ty,
                Title(0, 2, 'Declare schemas'), src, tar, tarnc, inter,
                Title(1, name = 'Create source instance')] + src_sects + [
                Title(2, name = 'Create target instance')] + tar_sects + [
                Title(3, name = "Data migration"),
                Title(3, 1, "Query which adds extra information when eval'd"),  Q,
                Title(3, 2, 'Mapping'),  M,
                Title(3, 3, 'Move instance data from src to target'), ialt, imap, icon, imrg,
                Title(3, 4, 'Record linkages'), final,
                ] + self._export(4,merged_conn,final)
Exemple #3
0
    def _from_db(self,
                 num  : int,
                 name : str,
                 s    : Schema,
                 ss   : CQLSchema,
                 land : Land,
                 conn : Conn
                 ) -> T[Instance, L[CQLSection]]:
        '''
        Assuming we have a DB connection for src or tar, create a series of CQL
        sections that result in an instance of the desired schema WITHOUT failing.

        In order to do this safely, we make no assumptions about the data adhering
        to FK constraints or data integrity constraints. We land the data in a
        pseudo-schema that has attributes instead of FKs and use the chase to
        produce an instance with valid FKs (if there is a NULL or dangling FK
        reference, a new record (with labeled NULLs) will be generated, which
        may in turn trigger other null records to also be generated). We then use
        a delete_cascade to remove records which do not meet data integrity constraints.
        '''
        s_core = s.schema('s_%s_core'%name,self._ty,uid=True,fks=False,pe=False)
        s_raw  = s.schema('s_%s_raw'%name,self._ty,uid=True,fks='attr',pe=False)
        s_fk   = s.schema(name+'_fk',self._ty,uid=True,pe=False)
        c_fk   = s.fk_constraints('con_fk_'+name,s_raw)

        idm = IdMap('id_core_'+name,s_core)
        m   = self._land_migrate('M_fks_'+name,s_raw,s_fk,idm)

        i_raw = land.inst('i_%s_raw'%name,s_raw,conn)
        ich   = ChaseInstance('i_chased_'+name,c_fk,i_raw)
        ifk   = MapInstance('i_fk_'+name,'sigma',m,ich)
        i     = DelInstance('i_'+name,ifk,ss)

        return i,[s_core,s_raw,s_fk,
                Title(num,1,'Mappings'), idm,  m,
                Title(num,2,'Constraints'), c_fk,
                Title(num,3,'Land data'), i_raw,
                Title(num,4,'Move "unconstrained" instance data into real schema'),
                ich,ifk,i,]
Exemple #4
0
    def sections(self, src_conn : Input, tar_conn : Input, merged_conn : Conn = None)-> L[CQLSection]:

        s1       = self.overlap.add_sql_attr(self.src)           # extra attributes added during landing, potentially
        t1       = self.overlap.add_sql_attr(self.tar,src=False) # extra attributes added during landing, potentially
        src      = s1.schema('src',self._ty,)
        tar      = t1.schema('tar',self._ty)

        srcneq      = s1.schema('src_no_cons',self._ty,pe=False)
        tarneq      = t1.schema('tar_no_cons',self._ty,pe=False)

        # Schemas which have extra info in them due to CQL queries
        schema_args1 = dict(typeside = 'ty',
                            entities = [e.ent.ent() for e in self.overlap.ne1.values()],
                            attrs    = [a.attr.attr() for a in self.overlap.na1],
                            fks      = [f.fk.fk() for f in self.overlap.nf1]) # type: dict
        schema_args2 = dict(typeside = 'ty',
                            entities = [e.ent.ent() for e in self.overlap.ne2.values()],
                            attrs    = [a.attr.attr() for a in self.overlap.na2],
                            fks      = [f.fk.fk() for f in self.overlap.nf2])# type: dict

        src2     = CQLSchema('src2',imports = [src], **schema_args1)
        tar2     = CQLSchema('tar2', imports = [tar],**schema_args2)

        src2neq = CQLSchema('src2_no_cons', imports = [srcneq],**schema_args1)
        tar2neq = CQLSchema('tar2_no_cons', imports = [tarneq],**schema_args2)

        l1,l2          = self._lands()
        isrc,src_sects = self._inst(1,'src',src_conn,s1,src,l1)
        itar,tar_sects = self._inst(2,'tar',tar_conn,self.tar,tar,l2,)

        Q1  = Query('Q1',src,src2,self.add_query_objs(s1))
        Q2  = Query('Q2',tar,tar2,self.add_query_objs(t1,src=False))
        ent_eqs = {**{e.ent.name:e.ent.name for e in
                         set(self.overlap.ne1.values()) | set(self.overlap.ne2.values())},
                   **self.overlap.entity_eqs()}

        rd = Rewrite(src2.rewrite_dict(self.overlap.entities,tar2))

        path_eqs = [PathEQ(a,b).patheq(rewrite=rd) for a,b in self.overlap.patheqs.items()]

        mrgargs  = dict(ent_eqs = {v:k for k,v in ent_eqs.items()},
                        path_eqs = path_eqs) # type: dict

        mrg_      = SchemaColimitQuotient(name='merged_',s1=tar2,s2=src2,
                                         **mrgargs)

        mrgneq   = SchemaColimitQuotient(name='merged_no_cons_',
                                          s1=tar2neq,s2=src2neq,**mrgargs)


        mrg    = self.overlap.modify(name  = 'merged_no_cons',sc = mrgneq)

        ssc      = GetSchema('s_merged',mrg_)
        sscneq   = GetSchema('s_merged_no_cons',mrg)

        isrc2  = EvalInstance('i_src2',Q1,isrc)
        itar2  = EvalInstance('i_tar2',Q2,itar)


        P1    = Include('P1',src2neq,src2)
        P2    = Include('P2',tar2neq,tar2)
        M1    = GetMapping('M1',mrg,src2neq)
        M2    = GetMapping('M2',mrg,tar2neq)

        isrc2neq = MapInstance('i_src_no_cons','delta',P1,isrc2)
        itar2neq = MapInstance('i_tar_no_cons','delta',P2,itar2)
        tmp1     = MapInstance('srctmp','sigma',M1,isrc2neq)
        tmp2     = MapInstance('tartmp','sigma',M2,itar2neq)
        imrgneq  = CoProdInstance('i_merged_no_con',tmp1,tmp2,sscneq)
        imrg     = DelInstance('i_merged',imrgneq,ssc)
        final    = self.tar.quotient('i_final',imrg)

        return [Title(0,0,'Set-up'),self.default, self._ty,
                Title(1,0,'Declare schemas'), src,tar,src2,tar2,srcneq,src2neq,tarneq,tar2neq,
                Title(1, name = 'Create source instance')] + src_sects + [
                Title(2, name = 'Create target instance')] + tar_sects + [
                Title(3, name = "Data integration"),
                Title(3,1,"Queries which add extra information when eval'd"), Q1, Q2, isrc2,itar2,
                Title(3,2,'Merging of schemas'), mrg_,mrgneq,mrg,ssc,sscneq,P1,P2,M1,M2,
                Title(3,3,'Merge instance data'),isrc2neq,itar2neq,tmp1,tmp2,imrgneq,imrg,
                Title(3, 4, 'Record linkages'), final,
                ] + self._export(4,merged_conn,final)