def get_pred_objs(self, star):
        """
        Returns a key value of predicate:object in a BGP/star-shaped subquery
        :param star: list of triple patterns
        :return: list of predicates
        """

        preds = {
            utils.getUri(tr.predicate, self.prefixes)[1:-1]:
            (utils.getUri(tr.theobject, self.prefixes)
             if tr.theobject.constant else tr.theobject.name)
            for tr in star if tr.predicate.constant
        }

        return preds
    def decompose_bgp(self, stars, bgp_preds):
        bgpstars = {}
        mtres = {}
        relevant_mts = {}
        starnames = sorted(list(stars.keys()))
        for s in starnames:
            spred = self.get_pred_objs(stars[s])
            bgpstars[s] = {}

            bgpstars[s]['triples'] = sorted(stars[s])

            bgpstars[s]['predicates'] = spred
            types = self.checkRDFTypeStatemnt(stars[s])
            if len(types) > 0:
                rdfmts = types
            else:
                rdfmts = self.config.find_rdfmt_by_preds(spred)

            bgpstars[s]['rdfmts'] = list(rdfmts.keys())
            mtres[s] = bgpstars[s]['rdfmts']
            relevant_mts.update(rdfmts)
        star_conn = self.getStarsConnections(stars)
        mt_conn = self.getMTsConnection(mtres, bgp_preds, relevant_mts)
        res = self.prune(star_conn, mt_conn, mtres, stars, relevant_mts)

        for s in res:
            bgpstars[s]['rdfmts'] = res[s]

        for s in res:
            datasources = {}
            for m in res[s]:
                for d in self.config.metadata[m].datasources:
                    dspreds = self.config.metadata[m].datasources[d]
                    preds = list(
                        set(bgpstars[s]['predicates']).intersection(dspreds))
                    if len(preds) > 0:
                        datasources.setdefault(d,
                                               {}).setdefault(m,
                                                              []).extend(preds)
                    else:
                        datasources.setdefault(d, {}).setdefault(
                            m, []).extend(dspreds)
                        if len(bgpstars[s]['predicates']) == 0:
                            preds = {
                                tr.predicate.name:
                                (utils.getUri(tr.theobject, self.prefixes) if
                                 tr.theobject.constant else tr.theobject.name)
                                for tr in stars[s]
                            }
                            bgpstars[s]['predicates'] = preds
            if len(datasources) == 0:
                return [], [], []
            bgpstars[s]['datasources'] = datasources

        return bgpstars, star_conn, mt_conn
Exemple #3
0
    def get_preds(self, star):
        """
        Returns a set of predicates in a BGP/star-shaped subquery
        :param star: list of triple patterns
        :return: list of predicates
        """

        preds = [utils.getUri(tr.predicate, self.prefixes)[1:-1]
                 for tr in star if tr.predicate.constant]

        return preds
Exemple #4
0
    def checkRDFTypeStatemnt(self, ltr):
        types = self.getRDFTypeStatement(ltr)
        typemols = {}
        for t in types:
            tt = utils.getUri(t.theobject, self.prefixes)[1:-1]
            if tt in self.config.metadata:
                mt = self.config.metadata[tt]
                typemols[tt] = mt
        if len(types) > 0 and len(typemols) == 0:
            return {}

        return typemols
Exemple #5
0
    def prune(self, star_conn, res_conn, selectedmolecules, stars, relevant_mts):
        newselected = {}
        res = {}
        counter = 0
        for s in selectedmolecules:
            if len(selectedmolecules[s]) == 1:
                newselected[s] = list(selectedmolecules[s])
                res[s] = list(selectedmolecules[s])
                counter += 1
            else:
                newselected[s] = []
                res[s] = []
        if counter == len(selectedmolecules):
            return res

        # check predicate level connections
        newfilteredonly = {}
        for s in res:
            sc = [c for c in star_conn if s in star_conn[c]['SO']]
            for c in sc:
                connectingtp = [utils.getUri(tp.predicate, self.prefixes)[1:-1]
                                for tp in stars[s] if tp.theobject.name == c]
                connectingtp = list(set(connectingtp))
                sm = selectedmolecules[s]
                for m in sm:
                    srange = [p for r in relevant_mts[m].predicates
                              for p in relevant_mts[m].predicates[r].ranges
                              if relevant_mts[m].predicates[r].predicate in connectingtp]
                    srange = list(set(srange).intersection(selectedmolecules[c]))
                    # if len(srange) == 0:
                    #     selectedmolecules[s].remove(m)
                    if c in newfilteredonly:
                        newfilteredonly[c].extend(srange)
                    else:
                        newfilteredonly[c] = srange
                    newfilteredonly[c] = list(set(newfilteredonly[c]))

        already_checked = []
        for s in res:
            sc = [c for c in star_conn if s in star_conn[c]['SO']]
            for c in sc:
                if s + c in already_checked or c + s in already_checked:
                    continue

                already_checked.extend([s + c, c + s])
                if c in newfilteredonly:
                    c_newfilter = newfilteredonly[c].copy()
                else:
                    c_newfilter = selectedmolecules[c].copy()
                    newfilteredonly[c] = selectedmolecules[c].copy()
                if s in newfilteredonly:
                    s_newfilter = newfilteredonly[s].copy()
                else:
                    s_newfilter = selectedmolecules[s].copy()
                    newfilteredonly[s] = selectedmolecules[s].copy()
                for m in s_newfilter:
                    con = res_conn[m]
                    if len(con) == 0:
                        continue
                    new_res = list(set(con).intersection(c_newfilter))
                    if len(new_res) == 0:
                        newfilteredonly[s].remove(m)

                # for m in c_newfilter:
                #     con = res_conn[m]
                #     if len(con) == 0:
                #         continue
                #     new_res = list(set(con).intersection(s_newfilter))
                #     if len(new_res) == 0:
                #         newfilteredonly[c].remove(m)

        for s in newfilteredonly:
            res[s] = list(set(newfilteredonly[s]))

        for s in res:
            if len(res[s]) == 0:
                res[s] = selectedmolecules[s]
            res[s] = list(set(res[s]))
        return res