'organism': 'TcruziCLBrenerEsmeraldo-like' } non_emeraldo = { 'genome_filename': 'TTDB/TriTrypDB-46_TcruziCLBrenerNon-Esmeraldo-like_Genome.fasta', 'regions_filename': 'TTDB/TriTrypDB-46_TcruziCLBrenerNon-Esmeraldo-like_AnnotatedCDSs.fasta', 'organism': 'TcruziCLBrenerNon-Esmeraldo-like' } organism = emeraldo_like if __name__ == "__main__": # Load FASTA files genome = FASTA(organism['genome_filename']) genome.load() regions = FASTA(organism['regions_filename']) regions.load() # Load database file sqlite = sqlite3.connect(SQLite_DB) # Create MFASeq Folder Organism_MFASeq_folder = f"{MFASeq_folder}/MFA-Seq_{organism['organism']}" if not os.path.isdir(Organism_MFASeq_folder): os.mkdir(Organism_MFASeq_folder) # Create MFASeq Files for chromosome_id in genome.data.keys(): Chromosome_file = f"{Organism_MFASeq_folder}/{chromosome_id}.txt"
action='store_true', help="Use base pairs instead of genome counts") args = parser.parse_args() protein_fasta = args.fasta or '/home/seijihariki/Documents/TCC/TTDB/TriTrypDB-46_TcruziCLBrenerEsmeraldo-like_AnnotatedTranscripts.fasta' simulation_folder = args.simulation search = args.search or 'DGF-1' base_pairs = args.basepairs simulation_cnt = args.count or 50 chromosomes_cnt = args.chromosomes or 41 print('Loading annotations:') transcripts = FASTA(protein_fasta) transcripts.load() collisions = {} print('Detecting collisions:') for chromosome in range(chromosomes_cnt): chromosome_name = f"TcChr{chromosome + 1}-S" collisions[chromosome_name] = [] for simulation in range(simulation_cnt): with open( f"{simulation_folder}simulation_{simulation}/{chromosome_name}.cseq" ) as times: start, end = -2, -2 current_location = 0