def functional_test_2(self): ''' Cluster the 5 genomes using gANI ''' genomes = self.genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc # Make sure gANI is installed loc, works = find_program('ANIcalculator') if (loc == None or works == False): print('Cannot locate the program {0}- skipping related tests'\ .format('ANIcalculator (for gANI)')) return args = argumentParser.parse_args(['cluster',wd_loc,'--S_algorithm',\ 'gANI','-g']+genomes) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(s_wd_loc) wd = WorkDirectory(wd_loc) # Confirm Cdb.csv is correct db1 = Swd.get_db('Cdb') del db1['comparison_algorithm'] db2 = wd.get_db('Cdb') del db2['comparison_algorithm'] assert compare_dfs(db1, db2), "{0} is not the same!".format('Cdb')
def functional_test_1(self): ''' Call filter on 'Escherichia_coli_Sakai.fna' Should call both prodigal and checkM ''' genomes = self.genomes wd_loc = self.wd_loc # make sure calling it on the right genome genome = [ g for g in genomes if g.endswith('Enterococcus_faecalis_T2.fna') ] assert len(genome) == 1 genome = genome[0] args = argumentParser.parse_args(['filter',wd_loc,'-g',genome] \ + ['--checkM_method', 'taxonomy_wf']) controller = Controller() controller.parseArguments(args) # Confirm Chdb.csv is correct wd = drep.WorkDirectory.WorkDirectory(wd_loc) chdb = wd.get_db('Chdb') assert chdb['Completeness'].tolist()[0] == 98.28 # Confirm genome is in Bdb.csv Gdb = wd.get_db('genomeInfo') assert Gdb['completeness'].tolist()[0] == 98.28
def unit_test_1(self): ''' Ensure choose can handle when Chdb is not present, running checkM automatically ''' # Delete Chdb wd_loc = self.working_wd_loc os.remove(wd_loc + '/data_tables/Chdb.csv') # Modify Bdb so the genome locations are right genomes = load_test_genomes() g2l = {os.path.basename(g): g for g in genomes} Bdb = pd.read_csv(wd_loc + '/data_tables/Bdb.csv') Bdb['location'] = Bdb['genome'].map(g2l) Bdb.to_csv(wd_loc + '/data_tables/Bdb.csv', index=False) # Run choose - this should re-run checkM and re-generate chdb args = argumentParser.parse_args(['choose', wd_loc, '--checkM_method',\ 'taxonomy_wf']) controller = Controller() controller.parseArguments(args) Swd = WorkDirectory(self.s_wd_loc) wd = WorkDirectory(self.working_wd_loc) for db in ['Chdb', 'genomeInformation']: db1 = Swd.get_db(db) db2 = wd.get_db(db) assert compare_dfs(db1, db2), "{0} is not the same!".format(db)
def unit_test_2(self): ''' Try out the --skipCheckM argument for choose ''' # Delete Chdb wd_loc = self.working_wd_loc os.remove(wd_loc + '/data_tables/Chdb.csv') os.remove(wd_loc + '/data_tables/Sdb.csv') os.remove(wd_loc + '/data_tables/Wdb.csv') # Run choose with --skipCheckM args = argumentParser.parse_args( ['choose', wd_loc, '--noQualityFiltering']) controller = Controller() controller.parseArguments(args) Swd = WorkDirectory(self.s_wd_loc) wd = WorkDirectory(self.working_wd_loc) for db in ['Sdb', 'Wdb', 'genomeInformation']: db1 = Swd.get_db(db) db2 = wd.get_db(db) assert not compare_dfs(db1, db2), "{0} is the same!".format(db) sdb = wd.get_db('Sdb') for s in sdb['score'].tolist(): assert (s > 0) & (s < 5)
def taxTest1(self): ''' Check the taxonomy call for max method ''' genomes = self.genomes wd_loc = self.wd_loc swd_loc = self.s_wd_loc # Call the command args = argumentParser.parse_args(['bonus',wd_loc,'-g'] +genomes \ + ['--run_tax','--cent_index','/home/mattolm/download/centrifuge/indices/b+h+v',\ '--tax_method', 'max']) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(swd_loc) wd = WorkDirectory(wd_loc) tdbS = Swd.get_db('Bdb') tdb = wd.get_db('Bdb') del tdbS['location'] del tdb['location'] assert compare_dfs(tdb, tdbS), "{0} is not the same!".format('Bdb') tdbS = Swd.get_db('Tdb') tdb = wd.get_db('Tdb') if compare_dfs(tdb, tdbS) == False: print("{0} is not the same! May be due to centrifuge index issues". format('Tdb')) my_panel = pd.Panel(dict(df1=tdbS, df2=tdb)) print(my_panel.apply(report_diff, axis=0)) assert True
def unit_tests_4(self): ''' Test changing cluster -pa ''' # normal complete run args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \ self.genomes + ['-pa', '0.10']) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(self.s_wd_loc) wd = WorkDirectory(self.working_wd_loc) # Confirm the following are correct: # for db in ['Mdb']: # db1 = Swd.get_db(db) # db2 = wd.get_db(db) # assert compare_dfs(db1, db2), "{0} is not the same!".format(db) # Confirm the following are not the same: for db in ['Ndb', 'Cdb']: db1 = Swd.get_db(db) db2 = wd.get_db(db) assert not compare_dfs( db1, db2), "{0} is the same! (and shouldn't be)".format(db)
def taxTest2(self): ''' Check the taxonomy call for percent method ''' genomes = self.genomes wd_loc = self.wd_loc swd_loc = self.s_wd_loc # Call the command args = argumentParser.parse_args(['bonus',wd_loc,'-g'] +genomes \ + ['--run_tax','--cent_index','/home/mattolm/download/centrifuge/indices/b+h+v',\ '--tax_method', 'percent']) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(swd_loc) wd = WorkDirectory(wd_loc) tdbS = Swd.get_db('BdbP') tdb = wd.get_db('Bdb') del tdbS['location'] del tdb['location'] assert compare_dfs(tdb, tdbS), "{0} is not the same!".format('Bdb') tdbS = Swd.get_db('TdbP') tdb = wd.get_db('Tdb') assert compare_dfs(tdb, tdbS), "{0} is not the same!".format('Tdb')
def plot_6_test_1(self): ''' Test plot 6 with different things missing ''' # Test with everything there args = argumentParser.parse_args(['analyze',self.working_wd_loc,'-pl', '6']) controller = Controller() controller.parseArguments(args) fig_dir = os.path.join(self.working_wd_loc, 'figures', '') figs = [os.path.basename(f) for f in glob.glob(fig_dir + '*')] FIGS = ['Winning_genomes.pdf'] assert sorted(figs) == sorted(FIGS) for fig in glob.glob(fig_dir + '*'): assert os.path.getsize(fig) > 0 # Test with removing Widb db_loc = os.path.join(self.working_wd_loc, 'data_tables', 'Widb.csv') os.remove(db_loc) for f in glob.glob(fig_dir + '*'): os.remove(f) args = argumentParser.parse_args(['analyze',self.working_wd_loc,'-pl', '6']) controller = Controller() controller.parseArguments(args) fig_dir = os.path.join(self.working_wd_loc, 'figures', '') figs = [os.path.basename(f) for f in glob.glob(fig_dir + '*')] FIGS = ['Winning_genomes.pdf'] assert sorted(figs) == sorted(FIGS) for fig in glob.glob(fig_dir + '*'): assert os.path.getsize(fig) > 0
def functional_test_2(self): ''' Ensure analyze crashes gracefully ''' wd_loc = self.working_wd_loc wd = drep.WorkDirectory.WorkDirectory(wd_loc) os.remove(os.path.join(wd.get_dir('data_tables'), 'Mdb.csv')) os.remove(os.path.join(wd.get_dir('data_tables'), 'Cdb.csv')) os.remove(os.path.join(wd.get_dir('data_tables'), 'Bdb.csv')) args = argumentParser.parse_args(['analyze',self.working_wd_loc,'-pl'] + \ ['a']) controller = Controller() controller.parseArguments(args)
def test_dereplicate_8(self): ''' Test greedy clustering with some primary clusters only having a single member ''' if len(self.large_genome_set) == 0: print("*** THIS TEST ONLY WORKS ON MO'S DEVELOPMENT MACHINE ***") return genomes = [self.large_genome_set[0], self.large_genome_set[20]] wd_loc = self.wd_loc wd_loc2 = self.wd_loc2 # Get greedy results args = argumentParser.parse_args([ 'compare', wd_loc2, '--S_algorithm', 'fastANI', '--multiround_primary_clustering', '--primary_chunksize', '50', '--greedy_secondary_clustering', '-sa', '0.95', '-pa', '0.99', '-g' ] + genomes) Controller().parseArguments(args) wd = WorkDirectory(wd_loc2) CSdb = wd.get_db('Cdb') # Run normal args = argumentParser.parse_args([ 'compare', wd_loc, '--S_algorithm', 'fastANI', '--multiround_primary_clustering', '--primary_chunksize', '50', '-sa', '0.95', '-pa', '0.99', '-g' ] + genomes) Controller().parseArguments(args) # Verify they're the same wd = WorkDirectory(wd_loc) Cdb = wd.get_db('Cdb') assert len(CSdb) == len(Cdb) for c in ['primary_cluster', 'secondary_cluster']: assert set(CSdb[c].value_counts().to_dict().values()) == set( Cdb[c].value_counts().to_dict().values()), c if c != 'secondary_cluster': assert set(CSdb[c].value_counts().to_dict().keys()) == set( Cdb[c].value_counts().to_dict().keys() ) #, [set(CSdb[c].value_counts().to_dict().keys()), set(Cdb[c].value_counts().to_dict().keys())] assert set(CSdb['genome'].tolist()) == set(Cdb['genome'].tolist()) assert set(CSdb.columns) - set(Cdb.columns) == set( ['greedy_representative'])
def functional_test_1(self): ''' Ensure analyze produces all plots ''' args = argumentParser.parse_args(['analyze',self.working_wd_loc,'-pl'] + \ ['a']) controller = Controller() controller.parseArguments(args) FIGS = ['Cluster_scoring.pdf', 'Clustering_scatterplots.pdf', \ 'Primary_clustering_dendrogram.pdf', 'Secondary_clustering_dendrograms.pdf', \ 'Winning_genomes.pdf', 'Secondary_clustering_MDS.pdf'] fig_dir = os.path.join(self.working_wd_loc, 'figures', '') figs = [os.path.basename(f) for f in glob.glob(fig_dir + '*')] assert sorted(figs) == sorted(FIGS) for fig in glob.glob(fig_dir + '*'): assert os.path.getsize(fig) > 0
def unit_tests_5(self): ''' Test changing cluster --S_algorithm gANI ''' # normal complete run args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \ self.genomes + ['--S_algorithm', 'gANI']) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(self.s_wd_loc) wd = WorkDirectory(self.working_wd_loc) # Confirm the following are correct: for db in ['Cdb', 'Mdb']: db1 = Swd.get_db(db) db2 = wd.get_db(db) assert compare_dfs(db1, db2), "{0} is not the same!".format(db)
def unit_tests_3(self): ''' Test cluster with --skipMash ''' # normal complete run args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \ self.genomes + ['--SkipMash']) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(self.s_wd_loc) wd = WorkDirectory(self.working_wd_loc) # Confirm the following are not the same: for db in ['Cdb', 'Ndb', 'Mdb']: db1 = Swd.get_db(db) db2 = wd.get_db(db) assert not compare_dfs( db1, db2), "{0} is the same! (and shouldn't be)".format(db)
def functional_test_2(self): genomes = self.genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc sanity_check(WorkDirectory(s_wd_loc)) args = argumentParser.parse_args(['compare', wd_loc, '-g'] + genomes) controller = Controller() controller.parseArguments(args) # Verify s_wd = WorkDirectory(s_wd_loc) wd = WorkDirectory(wd_loc) ensure_identicle(s_wd, wd, skip=['Bdb', 'Chdb', 'Sdb', 'Wdb', 'Widb',\ 'genomeInformation', 'Mdb']) # Perform sanity check to make sure solutions directiory isn't # being overwritten sanity_check(s_wd)
def functional_test_1(self): genomes = self.genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc sanity_check(WorkDirectory(s_wd_loc)) args = argumentParser.parse_args(['dereplicate',wd_loc,'-g'] + genomes \ + ['--checkM_method', 'taxonomy_wf']) controller = Controller() controller.parseArguments(args) # Verify s_wd = WorkDirectory(s_wd_loc) wd = WorkDirectory(wd_loc) ensure_identicle(s_wd, wd, skip=['Bdb', 'Mdb']) # Perform sanity check to make sure solutions directiory isn't # being overwritten sanity_check(s_wd)
def test_dereplicate_5(self): ''' Test greedy clustering ''' genomes = self.large_genome_set[:10] wd_loc = self.wd_loc wd_loc2 = self.wd_loc2 if len(genomes) == 0: print("*** THIS TEST ONLY WORKS ON MO'S DEVELOPMENT MACHINE ***") return # Get greedy results args = argumentParser.parse_args([ 'compare', wd_loc2, '--S_algorithm', 'fastANI', '--SkipMash', '--greedy_secondary_clustering', '-sa', '0.95', '-g' ] + genomes) Controller().parseArguments(args) wd = WorkDirectory(wd_loc2) CSdb = wd.get_db('Cdb') # Run normal args = argumentParser.parse_args([ 'compare', wd_loc, '--S_algorithm', 'fastANI', '--SkipMash', '-sa', '0.95', '-g' ] + genomes) Controller().parseArguments(args) # Verify they're the same wd = WorkDirectory(wd_loc) Cdb = wd.get_db('Cdb') assert len(CSdb) == len(Cdb) for c in ['primary_cluster', 'secondary_cluster']: assert set(CSdb[c].value_counts().to_dict().values()) == set( Cdb[c].value_counts().to_dict().values()), c assert set(CSdb[c].value_counts().to_dict().keys()) == set( Cdb[c].value_counts().to_dict().keys()), c assert set(CSdb['genome'].tolist()) == set(Cdb['genome'].tolist()) assert set(CSdb.columns) - set(Cdb.columns) == set( ['greedy_representative'])
def test_dereplicate_4(self): ''' Test the ability of primary clustering to take a large genome set and break it into chunks ''' genomes = self.large_genome_set wd_loc = self.wd_loc wd_loc2 = self.wd_loc2 if len(genomes) == 0: print("*** THIS TEST ONLY WORKS ON MO'S DEVELOPMENT MACHINE ***") return # Get normal results args = argumentParser.parse_args([ 'compare', wd_loc2, '--S_algorithm', 'fastANI', '--SkipSecondary', '--primary_chunksize', '50', '-g' ] + genomes) Controller().parseArguments(args) wd = WorkDirectory(wd_loc2) CSdb = wd.get_db('Cdb') # Run with chunking args = argumentParser.parse_args([ 'compare', wd_loc, '--S_algorithm', 'fastANI', '--SkipSecondary', '--multiround_primary_clustering', '--primary_chunksize', '50', '-g' ] + genomes) Controller().parseArguments(args) # Verify they're the same wd = WorkDirectory(wd_loc) Cdb = wd.get_db('Cdb') assert len(CSdb) == len(Cdb) for c in ['primary_cluster', 'secondary_cluster']: assert set(CSdb[c].value_counts().to_dict().values()) == set( Cdb[c].value_counts().to_dict().values()) assert set(CSdb[c].value_counts().to_dict().keys()) == set( Cdb[c].value_counts().to_dict().keys()) assert set(CSdb['genome'].tolist()) == set(Cdb['genome'].tolist()) assert set(Cdb.columns) - set(CSdb.columns) == set( ['length', 'subcluster', 'primary_representitive'])
def test_dereplicate_6(self): ''' Test zipped genomes ''' genomes = self.zipped_genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc test_utils.sanity_check(WorkDirectory(s_wd_loc)) args = argumentParser.parse_args( ['compare', wd_loc, '--S_algorithm', 'fastANI', '-g'] + genomes) controller = Controller() controller.parseArguments(args) # Verify wd = WorkDirectory(wd_loc) anis = wd.get_db('Ndb')['ani'].tolist() assert max(anis) <= 1 assert min(anis) >= 0 assert len(set(anis)) > 1
def functional_test_1(self): ''' Cluster the 5 genomes using default settings ''' genomes = self.genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc args = argumentParser.parse_args(['cluster', wd_loc, '-g'] + genomes) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(s_wd_loc) wd = WorkDirectory(wd_loc) # Confirm Cdb.csv is correct db1 = Swd.get_db('Cdb') db2 = wd.get_db('Cdb') assert compare_dfs(db1, db2), "{0} is not the same!".format('Cdb')
def test_taxonomy_4(self): ''' Try actually running centrifuge without prodigal done ''' loc, works = drep.d_bonus.find_program('centrifuge') if works == False: print('Centrifuge not installed- skipping tests') else: genomes = self.genomes wd_loc = self.wd_loc swd_loc = self.s_wd_loc # Remove previous data run shutil.rmtree(os.path.join(self.wd_loc, 'data', 'centrifuge')) shutil.rmtree(os.path.join(self.wd_loc, 'data', 'prodigal')) # Call the command args = argumentParser.parse_args(['bonus',wd_loc,'-g'] +genomes \ + ['--run_tax','--cent_index','/home/mattolm/download/centrifuge/indices/b+h+v',\ '--tax_method', 'percent']) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(swd_loc) wd = WorkDirectory(wd_loc) tdbS = Swd.get_db('BdbP') tdb = wd.get_db('Bdb') del tdbS['location'] del tdb['location'] assert test_utils.compare_dfs( tdb, tdbS), "{0} is not the same!".format('Bdb') tdbS = Swd.get_db('TdbP') tdb = wd.get_db('Tdb') assert test_utils.compare_dfs( tdb, tdbS), "{0} is not the same!".format('Tdb')
def skipsecondary_test(self): genomes = self.genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc args = argumentParser.parse_args(['cluster',wd_loc,'-g'] +genomes \ + ['--SkipSecondary']) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(s_wd_loc) wd = WorkDirectory(wd_loc) # Confirm Mdb.csv is correct db1 = Swd.get_db('Mdb') db2 = wd.get_db('Mdb') #assert compare_dfs(db1, db2), "{0} is not the same!".format('Mdb') # Confirm Ndb.csv doesn't exist db2 = wd.get_db('Ndb') assert db2.empty, 'Ndb is not empty'
def test_dereplicate_1(self): genomes = self.genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc test_utils.sanity_check(WorkDirectory(s_wd_loc)) args = argumentParser.parse_args(['dereplicate',wd_loc,'-g'] + genomes \ + ['--checkM_method', 'taxonomy_wf', '--debug', '--S_algorithm', 'ANImf']) controller = Controller() controller.parseArguments(args) # Verify s_wd = WorkDirectory(s_wd_loc) wd = WorkDirectory(wd_loc) test_utils.ensure_identicle( s_wd, wd, skip=['Bdb', 'Mdb', 'Sdb', 'Wdb', 'genomeInformation', 'Widb']) # Perform sanity check to make sure solutions directiory isn't # being overwritten test_utils.sanity_check(s_wd)
def test_dereplicate_3(self): ''' Use goANI ''' genomes = self.genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc test_utils.sanity_check(WorkDirectory(s_wd_loc)) args = argumentParser.parse_args( ['compare', wd_loc, '--S_algorithm', 'goANI', '-g'] + genomes) controller = Controller() controller.parseArguments(args) # Verify s_wd = WorkDirectory(s_wd_loc) wd = WorkDirectory(wd_loc) Ndb = wd.get_db('Ndb') assert len(Ndb) > 0 # Perform sanity check to make sure solutions directiory isn't # being overwritten test_utils.sanity_check(s_wd)
def functional_test_3(self): ''' Cluster the 5 genomes using ANImf ''' genomes = self.genomes wd_loc = self.wd_loc s_wd_loc = self.s_wd_loc args = argumentParser.parse_args(['cluster',wd_loc,'--S_algorithm',\ 'ANImf','-g']+genomes) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(s_wd_loc) wd = WorkDirectory(wd_loc) # Confirm Cdb.csv is correct db1 = Swd.get_db('Cdb') del db1['comparison_algorithm'] db2 = wd.get_db('Cdb') del db2['comparison_algorithm'] assert compare_dfs(db1, db2), "{0} is not the same!".format('Cdb')
def unit_tests_1(self): ''' Test a normal run of cluster ''' # normal complete run args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \ self.genomes) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(self.s_wd_loc) wd = WorkDirectory(self.working_wd_loc) # Confirm the following are correct: #for db in ['Cdb', 'Mdb', 'Ndb']: for db in ['Cdb', 'Ndb']: db1 = Swd.get_db(db) db2 = wd.get_db(db) if compare_dfs(db1, db2) == False: # db1['solution'] = True # db2['solution'] = False # db = pd.merge(db1, db2, on='') db1 = db1[['reference', 'querry', 'ani']] db1.rename(columns={'ani': 'ani1'}, inplace=True) db2 = db2[['reference', 'querry', 'ani']] db2.rename(columns={'ani': 'ani2'}, inplace=True) db1.sort_values(['reference', 'querry'], inplace=True) db2.sort_values(['reference', 'querry'], inplace=True) print("{0} is not the same!".format(db)) my_panel = pd.Panel(dict(df1=db1, df2=db2)) print(my_panel.apply(report_diff, axis=0)) print(pd.merge(db1, db2, on=['reference', 'querry'])) assert compare_dfs(db1, db2), "{0} is not the same!".format(db)
def unit_tests_1(self): ''' Test a normal run of cluster ''' # normal complete run args = argumentParser.parse_args(['cluster',self.working_wd_loc,'-g'] + \ self.genomes) controller = Controller() controller.parseArguments(args) # Verify Swd = WorkDirectory(self.s_wd_loc) wd = WorkDirectory(self.working_wd_loc) # Confirm the following are correct: #for db in ['Cdb', 'Mdb', 'Ndb']: for db in ['Cdb', 'Ndb']: db1 = Swd.get_db(db) db2 = wd.get_db(db) # get rid of some precision on the ANI if db == 'Ndb': db1['ani'] = [float("{0:.4f}".format(x)) for x in db1['ani']] db2['ani'] = [float("{0:.4f}".format(x)) for x in db2['ani']] if compare_dfs(db1, db2) == False: # # db1['solution'] = True # # db2['solution'] = False # # db = pd.merge(db1, db2, on='') db1 = db1[['reference', 'querry', 'ani']] # db1.rename(columns={'ani':'ani1'}, inplace=True) db2 = db2[['reference', 'querry', 'ani']] # db2.rename(columns={'ani':'ani2'}, inplace=True) print("now?") print(compare_dfs(db1, db2)) db1 = db1.sort_values(['reference', 'querry']) db2 = db2.sort_values(['reference', 'querry']) print(db1) print(db2) my_panel = pd.Panel(dict(df1=db1, df2=db2)) print('panel:') print(my_panel.apply(report_diff, axis=0)) # print("{0} is not the same!".format(db)) # # my_panel = pd.Panel(dict(df1=db1,df2=db2)) # print('panel:') # print(my_panel.apply(report_diff, axis=0)) # print('merge:') # xdb = pd.merge(db1, db2, on=['reference', 'querry']) # print(xdb) # print('diff:') # print(xdb[xdb['ani1'] != xdb['ani2']]) # # print('ref sorted 1') # print(db1['reference'].sort_values()) # # print('ref sorted 2') # print(db2['reference'].sort_values()) # # print('querry sorted 1') # print(db1['querry'].sort_values()) # # print('querry sorted 2') # print(db2['querry'].sort_values()) assert compare_dfs(db1, db2), "{0} is not the same!".format(db)