#this line takes the files from csv_error_files list_files = get_files_from_error_csv('error_files_list.csv') #Uncomment this line for the first insertion #list_files = get_list_ids_files_in_path('/RAST/SPREADSHEET/') list_files = reversed(list_files) list_files_error = [] list_files_done = [] print((list_files)) for file_name in list_files: print(file_name) #Load taxonomy #try: path_file_genBank = cwd + '/RAST/GEN_BANK/' + file_name[:-3] + 'gbk' gen_bank_obj = Genbank_proteic_RAST(path_file_genBank) taxo = gen_bank_obj.get_taxonomy_array() print(gen_bank_obj.get_family()) print(file_name) if len(taxo) == 7 or 'staphylococcus_aureus' in file_name.lower(): #Test proteins from file path_file_xls = cwd + '/RAST/SPREADSHEET/' + file_name path_file_contig = cwd + '/RAST/CONTIGS/' + file_name[:-3] + 'contigs.fa' xls_obj = Xls_gen_bank(path_file_xls) value = check_file_exits(path_file_xls) contig_file_exist = check_file_exits(path_file_contig)
#this line takes the files from csv_error_files list_files = get_files_from_error_csv('error_files_list.csv') #Uncomment this line for the first insertion #list_files = get_list_ids_files_in_path('/RAST/SPREADSHEET/') list_files = reversed(list_files) list_files_error = [] list_files_done = [] print((list_files)) for file_name in list_files: print(file_name) #Load taxonomy #try: path_file_genBank = cwd + '/RAST/GEN_BANK/' + file_name[:-3] + 'gbk' gen_bank_obj = Genbank_proteic_RAST(path_file_genBank) taxo = gen_bank_obj.get_taxonomy_array() print(gen_bank_obj.get_family()) print(file_name) if len(taxo) == 7 or 'streptococcus_oralis' in file_name.lower(): family_obj = None genus_obj = None specie_obj = None strain_obj = None if ('_phi' not in file_name.lower() or 'phage' not in file_name.lower()) and len(taxo) == 7: family_obj = Family(designation=gen_bank_obj.get_family()) genus_obj = Genus(designation=gen_bank_obj.get_genus()) specie_obj = Specie(designation=gen_bank_obj.get_specie()) strain_obj = Strain(designation=gen_bank_obj.get_strain())
print(xls_obj.get_number_of_proteins()) print("end") ############# TEST CONTIGS FASTA FILES ############# fasta_contigs_file = Fasta_contigs_RAST(cwd + '\RAST\\CONTIGS\\525717-Escherichia_coli_CFT079.contigs.fa') sequence_contig_nucleic = fasta_contigs_file.get_contig_seq_by_id(listas[0]) ############# TEST CONTIGS genbank FILES ############# genbank_file = Genbank_proteic_RAST(cwd + '\RAST\\GEN_BANK\\525717-Escherichia_coli_CFT079.gbk') aaaa = genbank_file.get_definition_of_the_organism() print(aaaa) www = genbank_file.get_taxonomy_array() print(www) print(type(genbank_file.data_gen_bank)) print(list(genbank_file.data_gen_bank.keys())[0]) qty_contig = genbank_file.get_number_of_contigs() print(genbank_file.get_family())