def __init__(self, fasta_filename, kabat_filename, alphabet, count, is_random=False): self.count = count self.alphabet = alphabet if kabat_filename is not None: self.load_kabat(kabat_filename) self.markup = [] else: self.markup = None raw_data = svm_tools.get_raw_data(fasta_filename, is_random) i = 0 self.data = [] for record in raw_data: if not self.is_marked() or record.id in self.regions: i += 1 self.data.append(record) if self.is_marked(): self.markup.append(self.regions[record.id]) if i >= count: break
def __init__(self, filename_list, gene_type_list, alphabet, count_per_file, is_random): self.alphabet = alphabet self.data = [] self.markup = None if gene_type_list is None else [] for filename, gene_type in zip(filename_list, gene_type_list): raw_data = svm_tools.get_raw_data(filename, is_random) self.data.extend(raw_data[:count_per_file]) if self.is_marked(): self.markup.extend([self.type_to_number[gene_type]] * count_per_file)