예제 #1
0
    def readdata(
            sourcex_matrix=None,
            sourcey_matrix=None,
            targetx_matrix=None,
            targety_matrix=None,
            src_path=SRC_FILE_PATH,
            tgt_path=TGT_FILE_PATH,
            src_size=None,
            tgt_size=None
    ) -> Tuple[np.matrix, List[int], np.matrix, List[int]]:
        """ 
        input is: source dataset with y, here we assume it is a list of list, the name is source, target dataset with yhat, 
        here we assume it is a list of list, the name is target 
        """
        if sourcex_matrix is None:
            sourcex_matrix_, sourcey_matrix = Classification.read_csv(
                src_path)  # matrix_ is source data
        else:
            sourcex_matrix_ = sourcex_matrix
            sourcey_matrix_ = sourcey_matrix

        if targetx_matrix is None:
            targetx_matrix_, targety_matrix_ = Classification.read_csv(
                tgt_path)
        else:
            targetx_matrix_ = targetx_matrix
            targety_matrix_ = targety_matrix

        # get list of all labels
        labelList = []
        for i in range(0, len(targety_matrix_)):
            if targety_matrix_[i] not in labelList:
                labelList.append(targety_matrix_[i])
        print("label list len:", len(labelList))

        # get list of indices of all source y labels
        sourcey_label = []
        for i in range(0, len(sourcey_matrix)):
            sourcey_label.append(labelList.index(sourcey_matrix[i]))

        # get list of indices of all target y labels
        targety_label = []
        for i in range(0, len(targety_matrix_)):
            targety_label.append(labelList.index(targety_matrix_[i]))

        return sourcex_matrix_, sourcey_label, targetx_matrix_, targety_label
예제 #2
0
    def readdata(sourcex_matrix=None,
                 sourcey_matrix=None,
                 targetx_matrix=None,
                 targety_matrix=None,
                 src_path=SRC_FILE_PATH,
                 tgt_path=TGT_FILE_PATH,
                 src_size=None,
                 tgt_size=None):
        """ 
        input is: source dataset with y, here we assume it is a list of list, the name is source, target dataset with yhat, 
        here we assume it is a list of list, the name is target 
        """
        if sourcex_matrix is None:
            sourcex_matrix_, sourcey_matrix = Classification.read_csv(
                src_path, None)  # matrix_ is source data
        else:
            sourcex_matrix_ = sourcex_matrix
            sourcey_matrix_ = sourcey_matrix
        matrix_ = sourcex_matrix_[:src_size, :]

        if targetx_matrix is None:
            targetx_, targety_ = Classification.read_csv(tgt_path, size=None)
        else:
            targetx_ = targetx_matrix
            targety_ = targety_matrix
        labellist = []
        for i in range(0, len(targety_)):
            if targety_[i] not in labellist:
                labellist.append(targety_[i])
        print("labellistlen", len(labellist))
        sourcey_label = []
        for i in range(0, len(sourcey_matrix)):
            sourcey_label.append(labellist.index(sourcey_matrix[i]))

        for i in range(0, len(targety_)):
            if targety_[i] not in labellist:
                labellist.append(targety_[i])
        targety_label = []
        for i in range(0, len(targety_)):
            targety_label.append(labellist.index(targety_[i]))
        return sourcex_matrix_, sourcey_label, targetx_, targety_label