def process(filename): print("Processing " + filename) sequence_record_list = [] # print(os.getcwd()) for record in SeqIO.parse(filename, "fasta"): sequence_record_list.append(record.seq) print("Sequences Extracted!") sequence_record = ''.join(str(e) for e in sequence_record_list) atree = ATree() print(str(len(sequence_record)) + "-->" + str(atree)) for subsequence_chunks in break_sequence(sequence_record, 8): atree.process_subsequence(subsequence_chunks) atree.dump_to_file(filename + "_TREE") print("Ensuring correct File System Navigation: " + os.getcwd()) atree.pickle_into_file("GenomeDataset/Processing/" + os.path.basename(filename) + "_pTREE") subprocess.call([ "rsync", "-az", "GenomeDataset/Processing/", "[email protected]:~/Documents/master-GSAFv2/gsaf-2.0/GenomeDataset/Processing/" ]) return len(sequence_record)
def __init__(self): self.eigen_exists = False self.eigen_values = [] # statistical_inferences attribute used for storing mean, mode, median, min, max, average self.statistical_inferences_exist = False self.statistical_inferences = { "mean": 0, "mode": 0, 'median': 0, 'min': 0, 'max': 0, 'range': 0 } self.has_been_analyzed = False self.gmap_exists = False self.gmap = np.zeros((340, 340), dtype=np.int) self.lookup_table = [] self.new_tree = ATree()