def process(filename): print("Processing " + filename) sequence_record_list = [] # print(os.getcwd()) for record in SeqIO.parse(filename, "fasta"): sequence_record_list.append(record.seq) print("Sequences Extracted!") sequence_record = ''.join(str(e) for e in sequence_record_list) atree = ATree() print(str(len(sequence_record)) + "-->" + str(atree)) for subsequence_chunks in break_sequence(sequence_record, 8): atree.process_subsequence(subsequence_chunks) atree.dump_to_file(filename + "_TREE") print("Ensuring correct File System Navigation: " + os.getcwd()) atree.pickle_into_file("GenomeDataset/Processing/" + os.path.basename(filename) + "_pTREE") subprocess.call([ "rsync", "-az", "GenomeDataset/Processing/", "[email protected]:~/Documents/master-GSAFv2/gsaf-2.0/GenomeDataset/Processing/" ]) return len(sequence_record)
def process(filename): print("Processing " + filename) sequence_record_list = [] # print(os.getcwd()) for record in SeqIO.parse(filename, "fasta"): sequence_record_list.append(record.seq) print("Sequences Extracted!") sequence_record = ''.join(str(e) for e in sequence_record_list) atree = ATree() print(str(len(sequence_record)) + "-->" + str(atree)) for subsequence_chunks in break_sequence(sequence_record, 8): atree.process_subsequence(subsequence_chunks) atree.dump_to_file(filename + "_TREE") print("Ensuring correct File System Navigation: " + os.getcwd()) atree.pickle_into_file("GenomeDataset/Processing/" + os.path.basename(filename) + "_pTREE") subprocess.call(["rsync", "-az", "GenomeDataset/Processing/", "[email protected]:~/Documents/master-GSAFv2/gsaf-2.0/GenomeDataset/Processing/"]) return len(sequence_record)
def __init__(self): self.eigen_exists = False self.eigen_values = [] # statistical_inferences attribute used for storing mean, mode, median, min, max, average self.statistical_inferences_exist = False self.statistical_inferences = { "mean": 0, "mode": 0, 'median': 0, 'min': 0, 'max': 0, 'range': 0 } self.has_been_analyzed = False self.gmap_exists = False self.gmap = np.zeros((340, 340), dtype=np.int) self.lookup_table = [] self.new_tree = ATree()
def __init__(self): self.eigen_exists = False self.eigen_values = [] # statistical_inferences attribute used for storing mean, mode, median, min, max, average self.statistical_inferences_exist = False self.statistical_inferences = {"mean": 0, "mode": 0, "median": 0, "min": 0, "max": 0, "range": 0} self.has_been_analyzed = False self.gmap_exists = False self.gmap = np.zeros((340, 340), dtype=np.int) self.lookup_table = [] self.new_tree = ATree()
class Chromosome: """Chromosome class to handle analysis modules""" _pattern_list = [ "".join(x) for i in range(1, 9) for x in product(*['ACGT'] * i) ] _pattern_list.sort() _pattern_list_half = [ "".join(x) for i in range(1, 5) for x in product(*['ACGT'] * i) ] _pattern_list_half.sort() def __init__(self): self.eigen_exists = False self.eigen_values = [] # statistical_inferences attribute used for storing mean, mode, median, min, max, average self.statistical_inferences_exist = False self.statistical_inferences = { "mean": 0, "mode": 0, 'median': 0, 'min': 0, 'max': 0, 'range': 0 } self.has_been_analyzed = False self.gmap_exists = False self.gmap = np.zeros((340, 340), dtype=np.int) self.lookup_table = [] self.new_tree = ATree() def load_chromosome_tree(self, filename): with open(filename, 'rb') as in_fh: self.new_tree = pickle.load(in_fh) print(filename + "Chromosome Tree Loaded!") def analyze(self, chromosome_file): print("Analyzing " + chromosome_file) if not self.has_been_analyzed: self.load_chromosome_tree(chromosome_file) self.map(chromosome_file) if not self.gmap_exists else print( 'Chromosome is mapped!') self.calculate_eigen_values() if not self.eigen_exists else print( 'EV(s) exist!') # Dump pickled data self.store_to_file(chromosome_file + "_pChromosome", filealso=False) # Update has_been_analyzed attribute self.has_been_analyzed = True def map(self, chromosome_file): print("Performing Mapping Logic on: " + chromosome_file) # Mapping Logic Code for pattern in self._pattern_list: if len(pattern) > 1: if len(pattern) == 2: self.gmap[self._pattern_list_half.index( pattern[:int(len(pattern) / 2)])][self._pattern_list_half.index( pattern[int(len(pattern) / 2):] )] = self.new_tree.count(pattern) # print(pattern[:int(len(pattern) / 2)] + "-->" + pattern[int(len(pattern) / 2):] + ":::" + str(self.gmap[self._pattern_list_half.index(pattern[:int(len(pattern) / 2)])][self._pattern_list_half.index(pattern[int(len(pattern) / 2):])])) else: if 1 + len(pattern) / 2 <= 4: self.gmap[self._pattern_list_half.index( pattern[:int(len(pattern) / 2)])][self._pattern_list_half.index( pattern[int(len(pattern) / 2):] )] = self.new_tree.count(pattern) # print(pattern[:int(len(pattern) / 2)] + "-->" + pattern[int(len(pattern) / 2):] + ":::" + str(self.gmap[self._pattern_list_half.index(pattern[:int(len(pattern) / 2)])][self._pattern_list_half.index(pattern[int(len(pattern) / 2):])])) self.gmap[self._pattern_list_half.index( pattern[:1 + int(len(pattern) / 2)])][self._pattern_list_half.index( pattern[1 + int(len(pattern) / 2):] )] = self.new_tree.count(pattern) # print(pattern[:1 + int(len(pattern) / 2)] + "-->" + pattern[1 + int(len(pattern) / 2):] + ":::" + str(self.gmap[self._pattern_list_half.index(pattern[:1 + int(len(pattern) / 2)])][self._pattern_list_half.index(pattern[1 + int(len(pattern) / 2):])])) self.gmap_exists = True print(str(chromosome_file) + " mapped successfully!") # print(self.gmap) with open('op', 'w') as outfile: outfile.write(str(self.gmap)) # This is to check if the mapping is indeed correct. If correct, both the print statements will print the same numerical value. # print(self.gmap[self._pattern_list_half.index('A')][self._pattern_list_half.index('AA')]) # print(self.gmap[self._pattern_list_half.index('AA')][self._pattern_list_half.index('A')]) def store_to_file(self, filename, filealso=False): with open(filename, 'wb') as picklefile: pickle.dump(self, picklefile) print("Stored to " + filename + " successfully!") if filealso: self.gmap.tofile(filename + "MAP_MATRIX_") def calculate_eigen_values(self): # Eigen Value Calculation and Output Code w, v = LA.eig(self.gmap) # print(len(w)) # Populate statistical inferences self.statistical_inferences['mean'] = np.mean(self.gmap) # self.statistical_inferences['mode'] = self.statistical_inferences['median'] = np.median(self.gmap) self.statistical_inferences['min'] = np.amin(self.gmap) self.statistical_inferences['max'] = np.amax(self.gmap) self.statistical_inferences['range'] = np.ptp(self.gmap) self.statistical_inferences_exist = True # print(self.statistical_inferences) # with open("map" + ) return w
class Chromosome: """Chromosome class to handle analysis modules""" _pattern_list = ["".join(x) for i in range(1, 9) for x in product(*["ACGT"] * i)] _pattern_list.sort() _pattern_list_half = ["".join(x) for i in range(1, 5) for x in product(*["ACGT"] * i)] _pattern_list_half.sort() def __init__(self): self.eigen_exists = False self.eigen_values = [] # statistical_inferences attribute used for storing mean, mode, median, min, max, average self.statistical_inferences_exist = False self.statistical_inferences = {"mean": 0, "mode": 0, "median": 0, "min": 0, "max": 0, "range": 0} self.has_been_analyzed = False self.gmap_exists = False self.gmap = np.zeros((340, 340), dtype=np.int) self.lookup_table = [] self.new_tree = ATree() def load_chromosome_tree(self, filename): with open(filename, "rb") as in_fh: self.new_tree = pickle.load(in_fh) print(filename + "Chromosome Tree Loaded!") def analyze(self, chromosome_file): print("Analyzing " + chromosome_file) if not self.has_been_analyzed: self.load_chromosome_tree(chromosome_file) self.map(chromosome_file) if not self.gmap_exists else print("Chromosome is mapped!") self.calculate_eigen_values() if not self.eigen_exists else print("EV(s) exist!") # Dump pickled data self.store_to_file(chromosome_file + "_pChromosome", filealso=False) # Update has_been_analyzed attribute self.has_been_analyzed = True def map(self, chromosome_file): print("Performing Mapping Logic on: " + chromosome_file) # Mapping Logic Code for pattern in self._pattern_list: if len(pattern) > 1: if len(pattern) == 2: self.gmap[self._pattern_list_half.index(pattern[: int(len(pattern) / 2)])][ self._pattern_list_half.index(pattern[int(len(pattern) / 2) :]) ] = self.new_tree.count(pattern) # print(pattern[:int(len(pattern) / 2)] + "-->" + pattern[int(len(pattern) / 2):] + ":::" + str(self.gmap[self._pattern_list_half.index(pattern[:int(len(pattern) / 2)])][self._pattern_list_half.index(pattern[int(len(pattern) / 2):])])) else: if 1 + len(pattern) / 2 <= 4: self.gmap[self._pattern_list_half.index(pattern[: int(len(pattern) / 2)])][ self._pattern_list_half.index(pattern[int(len(pattern) / 2) :]) ] = self.new_tree.count(pattern) # print(pattern[:int(len(pattern) / 2)] + "-->" + pattern[int(len(pattern) / 2):] + ":::" + str(self.gmap[self._pattern_list_half.index(pattern[:int(len(pattern) / 2)])][self._pattern_list_half.index(pattern[int(len(pattern) / 2):])])) self.gmap[self._pattern_list_half.index(pattern[: 1 + int(len(pattern) / 2)])][ self._pattern_list_half.index(pattern[1 + int(len(pattern) / 2) :]) ] = self.new_tree.count(pattern) # print(pattern[:1 + int(len(pattern) / 2)] + "-->" + pattern[1 + int(len(pattern) / 2):] + ":::" + str(self.gmap[self._pattern_list_half.index(pattern[:1 + int(len(pattern) / 2)])][self._pattern_list_half.index(pattern[1 + int(len(pattern) / 2):])])) self.gmap_exists = True print(str(chromosome_file) + " mapped successfully!") # print(self.gmap) with open("op", "w") as outfile: outfile.write(str(self.gmap)) # This is to check if the mapping is indeed correct. If correct, both the print statements will print the same numerical value. # print(self.gmap[self._pattern_list_half.index('A')][self._pattern_list_half.index('AA')]) # print(self.gmap[self._pattern_list_half.index('AA')][self._pattern_list_half.index('A')]) def store_to_file(self, filename, filealso=False): with open(filename, "wb") as picklefile: pickle.dump(self, picklefile) print("Stored to " + filename + " successfully!") if filealso: self.gmap.tofile(filename + "MAP_MATRIX_") def calculate_eigen_values(self): # Eigen Value Calculation and Output Code w, v = LA.eig(self.gmap) # print(len(w)) # Populate statistical inferences self.statistical_inferences["mean"] = np.mean(self.gmap) # self.statistical_inferences['mode'] = self.statistical_inferences["median"] = np.median(self.gmap) self.statistical_inferences["min"] = np.amin(self.gmap) self.statistical_inferences["max"] = np.amax(self.gmap) self.statistical_inferences["range"] = np.ptp(self.gmap) self.statistical_inferences_exist = True # print(self.statistical_inferences) # with open("map" + ) return w