def readdata( sourcex_matrix=None, sourcey_matrix=None, targetx_matrix=None, targety_matrix=None, src_path=SRC_FILE_PATH, tgt_path=TGT_FILE_PATH, src_size=None, tgt_size=None ) -> Tuple[np.matrix, List[int], np.matrix, List[int]]: """ input is: source dataset with y, here we assume it is a list of list, the name is source, target dataset with yhat, here we assume it is a list of list, the name is target """ if sourcex_matrix is None: sourcex_matrix_, sourcey_matrix = Classification.read_csv( src_path) # matrix_ is source data else: sourcex_matrix_ = sourcex_matrix sourcey_matrix_ = sourcey_matrix if targetx_matrix is None: targetx_matrix_, targety_matrix_ = Classification.read_csv( tgt_path) else: targetx_matrix_ = targetx_matrix targety_matrix_ = targety_matrix # get list of all labels labelList = [] for i in range(0, len(targety_matrix_)): if targety_matrix_[i] not in labelList: labelList.append(targety_matrix_[i]) print("label list len:", len(labelList)) # get list of indices of all source y labels sourcey_label = [] for i in range(0, len(sourcey_matrix)): sourcey_label.append(labelList.index(sourcey_matrix[i])) # get list of indices of all target y labels targety_label = [] for i in range(0, len(targety_matrix_)): targety_label.append(labelList.index(targety_matrix_[i])) return sourcex_matrix_, sourcey_label, targetx_matrix_, targety_label
def readdata(sourcex_matrix=None, sourcey_matrix=None, targetx_matrix=None, targety_matrix=None, src_path=SRC_FILE_PATH, tgt_path=TGT_FILE_PATH, src_size=None, tgt_size=None): """ input is: source dataset with y, here we assume it is a list of list, the name is source, target dataset with yhat, here we assume it is a list of list, the name is target """ if sourcex_matrix is None: sourcex_matrix_, sourcey_matrix = Classification.read_csv( src_path, None) # matrix_ is source data else: sourcex_matrix_ = sourcex_matrix sourcey_matrix_ = sourcey_matrix matrix_ = sourcex_matrix_[:src_size, :] if targetx_matrix is None: targetx_, targety_ = Classification.read_csv(tgt_path, size=None) else: targetx_ = targetx_matrix targety_ = targety_matrix labellist = [] for i in range(0, len(targety_)): if targety_[i] not in labellist: labellist.append(targety_[i]) print("labellistlen", len(labellist)) sourcey_label = [] for i in range(0, len(sourcey_matrix)): sourcey_label.append(labellist.index(sourcey_matrix[i])) for i in range(0, len(targety_)): if targety_[i] not in labellist: labellist.append(targety_[i]) targety_label = [] for i in range(0, len(targety_)): targety_label.append(labellist.index(targety_[i])) return sourcex_matrix_, sourcey_label, targetx_, targety_label