list_files = get_files_from_error_csv('error_files_list.csv')

#Uncomment this line for the first insertion
#list_files = get_list_ids_files_in_path('/RAST/SPREADSHEET/')
list_files = reversed(list_files)
list_files_error = []
list_files_done = []
print((list_files))
for file_name in list_files:
    print(file_name)
    #Load taxonomy
    #try:
    path_file_genBank = cwd + '/RAST/GEN_BANK/' + file_name[:-3] + 'gbk'
    gen_bank_obj = Genbank_proteic_RAST(path_file_genBank)
    taxo = gen_bank_obj.get_taxonomy_array()
    print(gen_bank_obj.get_family())
    print(file_name)
    if len(taxo) == 7 or 'staphylococcus_aureus' in file_name.lower():

        #Test proteins from file
        path_file_xls = cwd + '/RAST/SPREADSHEET/' + file_name
        path_file_contig = cwd + '/RAST/CONTIGS/' + file_name[:-3] + 'contigs.fa'

        xls_obj = Xls_gen_bank(path_file_xls)

        value = check_file_exits(path_file_xls)

        contig_file_exist = check_file_exits(path_file_contig)

        #Test contigs from file
        if contig_file_exist is True:
Ejemplo n.º 2
0
list_files = get_files_from_error_csv('error_files_list.csv')

#Uncomment this line for the first insertion
#list_files = get_list_ids_files_in_path('/RAST/SPREADSHEET/')
list_files = reversed(list_files)
list_files_error = []
list_files_done = []
print((list_files))
for file_name in list_files:
    print(file_name)
    #Load taxonomy
    #try:
    path_file_genBank = cwd + '/RAST/GEN_BANK/' + file_name[:-3] + 'gbk'
    gen_bank_obj = Genbank_proteic_RAST(path_file_genBank)
    taxo = gen_bank_obj.get_taxonomy_array()
    print(gen_bank_obj.get_family())
    print(file_name)
    if len(taxo) == 7 or 'streptococcus_oralis' in file_name.lower():

        family_obj = None
        genus_obj = None
        specie_obj = None
        strain_obj = None
        if ('_phi' not in file_name.lower()
                or 'phage' not in file_name.lower()) and len(taxo) == 7:
            family_obj = Family(designation=gen_bank_obj.get_family())
            genus_obj = Genus(designation=gen_bank_obj.get_genus())
            specie_obj = Specie(designation=gen_bank_obj.get_specie())
            strain_obj = Strain(designation=gen_bank_obj.get_strain())
        if 'streptococcus_oralis' in file_name.lower():
            family_obj = Family(designation='Streptococcaceae')
Ejemplo n.º 3
0
genbank_file = Genbank_proteic_RAST(cwd + '\RAST\\GEN_BANK\\525717-Escherichia_coli_CFT079.gbk')


aaaa = genbank_file.get_definition_of_the_organism()
print(aaaa)
www = genbank_file.get_taxonomy_array()
print(www)

print(type(genbank_file.data_gen_bank))

print(list(genbank_file.data_gen_bank.keys())[0])

qty_contig = genbank_file.get_number_of_contigs()

print(genbank_file.get_family())

print(genbank_file.get_genus())

print(genbank_file.get_specie())

print(genbank_file.get_strain())

print("--------")



##### Start test for one organisme
start_time = time.time()
print("Start organism")
for contig in listas: