dict_values = {} dict_values['287.5076'] = 'Pseudomonas_aeruginosa_4053 ' for file_name in list_files: if file_name[:-4] in dict_values: path_file_xls = cwd + '/RAST/Xls/' + file_name[:-3] + 'xls' path_file_contig = cwd + '/RAST/CONTIG/' + file_name[:-3] + 'contigs.fa' xls_file_exists = check_file_exits(path_file_xls) contig_file_exist = check_file_exits(path_file_contig) assert xls_file_exists == True and contig_file_exist == True, 'A file is missing' xls_obj = Xls_gen_bank(path_file_xls, sheet_name = 'Sheet1') contig_fasta_file_patric_obj = Fasta_contigs_RAST(path_file = path_file_contig) qty_cntg = contig_fasta_file_patric_obj.get_qty_of_contigs() list_cnt = contig_fasta_file_patric_obj.create_contigs_from_file() gi_name = "Greg_" + dict_values[file_name[:-4]] acc_value = "Greg_" + dict_values[file_name[:-4]] person_responsible = 2 source_data = 3 phage_designation = dict_values[file_name[:-4]]
strain_obj.fk_specie = specie_obj.id_specie strain_obj.create_strain() #Test proteins from file path_file_xls = cwd + '/RAST/SPREADSHEET/' + file_name path_file_contig = cwd + '/RAST/CONTIGS/' + file_name[:-3] + 'contigs.fa' xls_obj = Xls_gen_bank(path_file_xls) value = check_file_exits(path_file_xls) contig_file_exist = check_file_exits(path_file_contig) #Test contigs from file if contig_file_exist is True: fasta_contig_obj = Fasta_contigs_RAST(path_file_contig) qtity_cntg = fasta_contig_obj.get_qty_of_contigs() list_cnt = fasta_contig_obj.create_contigs_from_file() print("Hello") #If not create a list with the contigs name else: list_cnt = [] list_contig_name = xls_obj.get_contigs_id_sorted() for contig_element in list_contig_name: contig_obj = Contig(head=contig_element, sequence='') list_cnt.append(contig_obj) #id_contig = -1, id_contig_db_outside = -1, head = "", sequence = " ", fk_id_whole_genome = -1 print(list_cnt) list_of_proteins = xls_obj.create_proteins_from_file() qty_proteins_loaded = len(list_of_proteins)
#Test get all proteins from a given contig id list_prot = xls_obj.get_proteins_ids_by_contig_id("out_1") print(type(list_prot)) print(list_prot[0]) #Test get information from a given protein id www = xls_obj.get_information_line_by_protein_id(list_prot[0]) print(xls_obj.get_number_of_proteins()) print("end") ############# TEST CONTIGS FASTA FILES ############# fasta_contigs_file = Fasta_contigs_RAST(cwd + '\RAST\\CONTIGS\\525717-Escherichia_coli_CFT079.contigs.fa') sequence_contig_nucleic = fasta_contigs_file.get_contig_seq_by_id(listas[0]) ############# TEST CONTIGS genbank FILES ############# genbank_file = Genbank_proteic_RAST(cwd + '\RAST\\GEN_BANK\\525717-Escherichia_coli_CFT079.gbk') aaaa = genbank_file.get_definition_of_the_organism() print(aaaa) www = genbank_file.get_taxonomy_array() print(www)