def __init__(self, path, list_path, aa_batchsize=800):
        # self.input_path = './data/nr40/KNN'
        # self.target_path = './data/nr40/tor'
        self.input_path = path['knn_75_batch']
        if list_path == None:
            self.file_list = os.listdir(path)
        else:
            with open(file_list, 'r') as f:
                self.file_list = f.read().splitlines()

        length = {}
        with open('./data/nr40/len.txt') as f:
            lines = f.read().split('\n')
            for line in lines[:-1]:
                filename, l_ = line.split(':')
                length[filename] = int(l_)

        len_sep_files = [[] for _ in range(2000)]
        lengths = []
        for filename in self.file_list:
            lengths.append(length[filename])
            len_sep_files[length[filename]].append(filename)

        self.batchs = rep_utils.group_files(len_sep_files, lengths,
                                            aa_batchsize)
    def __init__(self, path, length_file, seq_file, aa_batchsize=1250):
        self.input_path = path
        self.file_list = os.listdir(path)

        length = {}
        with open(length_file) as f:
            lines = f.read().split('\n')
            for line in lines[:-1]:
                filename, l = line.split(':')
                length[filename] = int(l)
        len_seq_files = [[] for _ in range(2000)]
        lengths = []
        for filename in self.file_list:
            filename = filename.split('.')[0]
            lengths.append(length[filename])
            len_seq_files[length[filename]].append(filename)

        # len_seq_files为按seq_length从小到大分类的文件名
        self.batchs = rep_utils.group_files(len_seq_files, lengths,
                                            aa_batchsize)
        self.seq_dict = rep_utils.read_fasta(seq_file)
    def __init__(self, file_list, aa_batchsize=800):
        # self.input_path = './data/nr40/comp_image_pept_r128'
        self.input_path = './data/nr40/comp_image_ca_rescon'
        # self.input_path = '/share/Data/processed/nr40/comp_image_ca_multiview'
        self.target_path = './data/nr40/tor'
        with open('./data/nr40/dataset_list/%s.txt' % file_list) as f:
            self.file_list = f.read().split('\n')

        length = {}
        with open('./data/nr40/len.txt') as f:
            lines = f.read().split('\n')
            for line in lines[:-1]:
                filename, l_ = line.split(':')
                length[filename] = int(l_)

        len_sep_files = [[] for _ in range(2000)]
        lengths = []
        for filename in self.file_list:
            lengths.append(length[filename])
            len_sep_files[length[filename]].append(filename)

        self.batchs = rep_utils.group_files(len_sep_files, lengths, aa_batchsize)
    def __init__(self, path, aa_batchsize=60):
        self.input_path = path
        list_path = '/home/caiyi/data/rocklin/src/struct_ssm_test/set_full.txt'
        with open(list_path) as f:
            lines = f.read().split('\n')
            self.file_list = lines[:-1]
        length_file = '/home/caiyi/data/rocklin/src/len1.txt'
        length = {}
        with open(length_file) as f:
            lines = f.read().split('\n')
            for line in lines[:-1]:
                filename, l = line.split(':')
                length[filename] = int(l)
        len_seq_files = [[] for _ in range(2000)]
        lengths = []
        for filename in self.file_list:
            filename = filename[:-4]
            lengths.append(length[filename])
            len_seq_files[length[filename]].append(filename)

        # len_seq_files为按seq_length从小到大分类的文件名
        self.batchs = rep_utils.group_files(len_seq_files, lengths,
                                            aa_batchsize)