Example #1
0
    def set_filepaths(self, filepaths):
        """

        :param filepaths: lst of dictionaries, each dictionary specifies where pkl file is saved.
            Currently supports below:
            [
                {'home': home, 'project':project_name},
                {'bucket': aws_bucket, 'project':project_name}
            ]
        :return:
        """
        if self.obj_id is None:
            msg = "The object doesn't have an obj_id, which means it's not saved in db yet," +\
                  "so it should not be saved in storage either."
            raise AttributeError(msg)

        if self.filepaths:
            raise AttributeError(
                "The set_filepaths method in Base does not allow reseting the file paths."
            )
        if filepaths and not isinstance(filepaths, list):
            raise TypeError(
                "Currently the collection of the file paths has to be of the list type"
            )
        elif filepaths:
            for path in filepaths:
                if not isinstance(path, dict):
                    raise TypeError(
                        "Currently the file paths have to be of the dictionary type"
                    )

        dh = DbHandler()
        dh.update_doc(self, {"filepaths": filepaths})
        self.__filepaths = filepaths
Example #2
0
    def save_db(self, db):
        """

        :param db: dict
        :return:
        """
        self.set_db(db)
        dh = DbHandler()
        obj_id = dh.init_doc(self)
        self.obj_id = obj_id
Example #3
0
    def l_prepare_locate(self, l_node):
        if not l_node.lab_fed.obj_id:
            self.l_locate(l_node.lab_fed)

        dh = DbHandler()
        lst_transform_ids = dh.search_by_essentials(l_node.l_transform,
                                                    l_node.pipe_init.db)
        lst_transform_ids = [
            x["_id"] for x in lst_transform_ids if x["_id"] not in self.matched
        ]
        return lst_transform_ids
Example #4
0
    def identify_label(l_node, lst_l_transform):
        frame = l_node.pipe_init.frame
        lab_fed = l_node.lab_fed.obj_id

        dh = DbHandler()
        all_docs = []
        for l_tran in lst_l_transform:
            tmp = Label(frame=frame,
                        l_transform=l_tran,
                        raw_y=lab_fed,
                        values=None)
            all_docs.extend(dh.search_by_essentials(tmp, l_node.pipe_init.db))
        all_docs = sorted(all_docs, key=lambda d: not bool(d["filepaths"]))

        return all_docs
Example #5
0
    def f_prepare_locate(self, f_node):
        for node in f_node.lst_fed:
            if node.obj_id is None:
                self.f_locate(node)

        lc = LConnector(self.l_matched)
        lc.l_locate(f_node.l_node)

        dh = DbHandler()
        lst_transform_ids = dh.search_by_essentials(f_node.f_transform,
                                                    f_node.pipe_init.db)
        lst_transform_ids = [
            x["_id"] for x in lst_transform_ids if x["_id"] not in self.matched
        ]

        return lst_transform_ids
Example #6
0
    def identify_feature(f_node, lst_f_transform):
        frame = f_node.pipe_init.frame
        lst_fed = [f.obj_id for f in f_node.lst_fed]

        dh = DbHandler()
        all_docs = []
        for f_tran in lst_f_transform:
            tmp = Feature(frame=frame,
                          f_transform=f_tran,
                          lst_fed=lst_fed,
                          label=f_node.l_node.obj_id,
                          values=None)
            all_docs.extend(dh.search_by_essentials(tmp, f_node.pipe_init.db))
        all_docs = sorted(all_docs, key=lambda d: not bool(d["filepaths"]))

        return all_docs
Example #7
0
    def search_for_scheme(self, db):
        """

        :return:
        """
        dh = DbHandler()
        docs = dh.search_by_essentials(self, db)

        if bool(docs):
            doc = docs[0]
            obj_id = doc["_id"]
            filepaths = doc["filepaths"]
            element = self.decide_element()

            ih = IOHandler()
            scheme_loaded = ih.load_obj_from_file(obj_id, element, filepaths)
            return scheme_loaded
        else:
            return None
Example #8
0
    def l_locate(self, l_node, save_obtained=True):
        if not isinstance(l_node, LNode):
            raise TypeError(
                "The parameter l_node should be of the type LNode.")

        label_obtained, l_trans_obtained = None, None
        db = l_node.pipe_init.db
        filepaths = l_node.pipe_init.filepaths

        if l_node.obj_id is None:
            lst_l_transform = self.l_prepare_locate(l_node)
            all_docs = self.identify_label(l_node, lst_l_transform)

            if all_docs:
                doc = all_docs[0]
                if doc["filepaths"]:
                    # update l_node
                    l_node.obj_id = doc["_id"]
                    l_node.filepaths = doc["filepaths"]

                elif save_obtained:
                    label, l_transform = self.materialize_with_existing_doc(
                        doc=doc, l_node=l_node)

                    # save obtained
                    l_transform.save_file(filepaths)
                    label.save_file(filepaths)

                    # update l_node
                    l_node.obj_id = doc["_id"]
                    l_node.filepaths = filepaths

                    # for return
                    label_obtained, l_trans_obtained = label, l_transform

                else:
                    # update l_node
                    l_node.obj_id = doc["_id"]

            elif save_obtained:
                label, l_transform = self.set_off_and_record(l_node, db)

                # save obtained
                l_transform.save_file(filepaths)
                label.save_file(filepaths)

                # update l_node
                l_node.obj_id = label.obj_id
                l_node.filepaths = label.filepaths

                # for return
                label_obtained, l_trans_obtained = label, l_transform

            else:
                label, l_transform = self.set_off_and_record(l_node, db)

                # update l_node
                l_node.obj_id = label.obj_id

                # for return
                label_obtained, l_trans_obtained = label, l_transform
        else:
            dh = DbHandler()
            doc = dh.search_by_obj_id(obj_id=l_node.obj_id,
                                      element="Label",
                                      db=db)

            if doc["filepaths"]:
                doc_filepaths = doc[
                    "filepaths"]  # Prevent potentials errors resulted from different
                # filepaths from doc and pipe_init

                # update l_node
                if l_node.filepaths is None:
                    l_node.filepaths = doc_filepaths

            elif save_obtained:
                label, l_transform = self.materialize_with_existing_doc(
                    doc=doc, l_node=l_node)

                # save obtained
                label.save_file(filepaths)
                l_transform.save_file(filepaths)

                # update l_node
                l_node.filepaths = filepaths

                # for return
                label_obtained, l_trans_obtained = label, l_transform

            else:
                label, l_transform = self.materialize_with_existing_doc(
                    doc=doc, l_node=l_node)

                # for return
                label_obtained, l_trans_obtained = label, l_transform

        return label_obtained, l_trans_obtained
Example #9
0
    def f_locate(self, f_node, save_obtained=True):
        if not isinstance(f_node, FNode):
            raise TypeError("The parameter f_node should of the type FNode.")

        feature_obtained, f_trans_obtained = None, None
        db = f_node.pipe_init.db
        filepaths = f_node.pipe_init.filepaths

        if f_node.obj_id is None:
            if not isinstance(f_node.l_node, LNode):
                raise TypeError(
                    "The attribute f_node.l_node should be of the type LNode")

            lst_f_transform = self.f_prepare_locate(f_node)
            all_docs = self.identify_feature(f_node, lst_f_transform)

            if all_docs:
                doc = all_docs[0]
                if doc["filepaths"]:
                    # update f_node
                    f_node.obj_id = doc["_id"]
                    f_node.filepaths = doc["filepaths"]

                elif save_obtained:
                    feature, f_transform = self.materialize_with_existing_doc(
                        f_node=f_node, doc=doc)

                    # save obtained
                    f_transform.save_file(filepaths)
                    feature.save_file(filepaths)

                    # update f_node
                    f_node.obj_id = doc["_id"]
                    f_node.filepaths = filepaths

                    # for return
                    feature_obtained, f_trans_obtained = feature, f_transform

                else:
                    # update f_node
                    f_node.obj_id = doc["_id"]

            elif save_obtained:
                feature, f_transform = self.set_off_and_record(f_node, db)

                # save obtained
                f_transform.save_file(filepaths)
                feature.save_file(filepaths)

                # update f_node
                f_node.obj_id = feature.obj_id
                f_node.filepaths = feature.filepaths

                # for return
                feature_obtained, f_trans_obtained = feature, f_transform

            else:
                feature, f_transform = self.set_off_and_record(f_node, db)

                # update f_node
                f_node.obj_id = feature.obj_id

                # for return
                feature_obtained, f_trans_obtained = feature, f_transform

        else:
            dh = DbHandler()
            doc = dh.search_by_obj_id(obj_id=f_node.obj_id,
                                      element="Feature",
                                      db=db)

            if doc["filepaths"]:
                doc_filepaths = doc[
                    "filepaths"]  # Prevent potentials errors resulted from different
                # filepaths from doc and pipe_init

                # update f_node
                if f_node.filepaths is None:
                    f_node.filepaths = filepaths

                # TODO: we should probably remove this part since nothing is "obtained" here
                # ih = IOHandler()
                # feature = ih.load_obj_from_file(f_node.obj_id, "Feature", doc_filepaths)
                # f_transform = ih.load_obj_from_file(doc["essentials"]["f_transform"], "FTransform", doc_filepaths)
                #
                # # for return
                # feature_obtained, f_trans_obtained = feature, f_transform

            elif save_obtained:
                feature, f_transform = self.materialize_with_existing_doc(
                    f_node=f_node, doc=doc)

                # save obtained
                f_transform.save_file(filepaths)
                feature.save_file(filepaths)

                # update f_node
                f_node.filepaths = filepaths

                # for return
                feature_obtained, f_trans_obtained = feature, f_transform
            else:
                feature, f_transform = self.materialize_with_existing_doc(
                    f_node=f_node, doc=doc)

                # for return
                feature_obtained, f_trans_obtained = feature, f_transform

        return feature_obtained, f_trans_obtained