Ejemplo n.º 1
0
    'organism': 'TcruziCLBrenerEsmeraldo-like'
}
non_emeraldo = {
    'genome_filename':
    'TTDB/TriTrypDB-46_TcruziCLBrenerNon-Esmeraldo-like_Genome.fasta',
    'regions_filename':
    'TTDB/TriTrypDB-46_TcruziCLBrenerNon-Esmeraldo-like_AnnotatedCDSs.fasta',
    'organism': 'TcruziCLBrenerNon-Esmeraldo-like'
}

organism = emeraldo_like

if __name__ == "__main__":
    # Load FASTA files
    genome = FASTA(organism['genome_filename'])
    genome.load()

    regions = FASTA(organism['regions_filename'])
    regions.load()

    # Load database file
    sqlite = sqlite3.connect(SQLite_DB)

    # Create MFASeq Folder
    Organism_MFASeq_folder = f"{MFASeq_folder}/MFA-Seq_{organism['organism']}"
    if not os.path.isdir(Organism_MFASeq_folder):
        os.mkdir(Organism_MFASeq_folder)

    # Create MFASeq Files
    for chromosome_id in genome.data.keys():
        Chromosome_file = f"{Organism_MFASeq_folder}/{chromosome_id}.txt"
                    action='store_true',
                    help="Use base pairs instead of genome counts")

args = parser.parse_args()

protein_fasta = args.fasta or '/home/seijihariki/Documents/TCC/TTDB/TriTrypDB-46_TcruziCLBrenerEsmeraldo-like_AnnotatedTranscripts.fasta'
simulation_folder = args.simulation
search = args.search or 'DGF-1'
base_pairs = args.basepairs

simulation_cnt = args.count or 50
chromosomes_cnt = args.chromosomes or 41

print('Loading annotations:')
transcripts = FASTA(protein_fasta)
transcripts.load()

collisions = {}

print('Detecting collisions:')
for chromosome in range(chromosomes_cnt):
    chromosome_name = f"TcChr{chromosome + 1}-S"
    collisions[chromosome_name] = []

    for simulation in range(simulation_cnt):
        with open(
                f"{simulation_folder}simulation_{simulation}/{chromosome_name}.cseq"
        ) as times:
            start, end = -2, -2
            current_location = 0