def test_file_invalid(self, invalid_json_alignment): """Tests that the invalid alignment files fail properly. Args: invalid_json_alignment (pytest.fixture): A parameterized pytest fixture providing invalid json alignment filenames. """ with open(invalid_json_alignment) as in_json: with pytest.raises(dr.AlignmentIOError): dr.read_json_alignment_flo(in_json)
def test_stringio_valid(self, valid_json_alignment): """Tests that valid JSON alignment files do not fail. Tests that the valid alignment files do not fail when loaded into StringIO objects. Args: valid_json_alignment (pytest.fixture): A parameterized pytest fixture providing valid json alignment filenames. """ with open(valid_json_alignment) as in_json: json_stringio = StringIO() json_stringio.write(in_json.read()) json_stringio.seek(0) try: (sequence_list, headers ) = dr.read_json_alignment_flo(json_stringio) if headers is not None: assert len(headers) > 0 assert len(sequence_list) > 0 for i in sequence_list: assert isinstance(i, Sequence) assert isinstance(i.name, string_formats) assert len(i.cont_values) > 0 except Exception as e: print('Raised exception: {}'.format(str(e))) assert False
def test_stringio_invalid(self, invalid_json_alignment): """Tests that invalid alignment files fail properly. Tests that the invalid alignment files fail properly when loaded into StringIO objects. Args: invalid_json_alignment (pytest.fixture): A parameterized pytest fixture providing invalid json alignment filenames. """ with open(invalid_json_alignment) as in_json: json_stringio = StringIO() json_stringio.write(in_json.read()) json_stringio.seek(0) with pytest.raises(dr.AlignmentIOError): dr.read_json_alignment_flo(json_stringio)
def test_package_invalid(self, invalid_ancestral_distribution_package): """Test calculate_ancestral_distributions method with invalid data. Args: invalid_ancestral_distribution_package (pytest.fixture): A pytest fixture that is parametrized to provide invalid ancestral distributions, one at a time, so that there are multiple test functions defined for each invalid package. Note: * This test will need to evolve as the output format changes. It will probably be better to return a data structure with various values for each node rather than assigning the value to the node label. Raises: IOError: When the tree or alignment cannot be loaded for the specified file extension. """ # Get the data files (tree_filename, alignment_filename, results_filename) = invalid_ancestral_distribution_package # Process the tree file _, tree_ext = os.path.splitext(tree_filename) if tree_ext == '.nex': tree_schema = 'nexus' elif tree_ext == '.xml': tree_schema = 'nexml' elif tree_ext == '.tre': tree_schema = 'newick' else: raise IOError( 'Cannot handle tree with extension: {}'.format(tree_ext)) tree = dendropy.Tree.get(path=tree_filename, schema=tree_schema) # Process the alignment file _, align_ext = os.path.splitext(alignment_filename) if align_ext == '.csv': with open(alignment_filename) as align_file: sequences, headers = data_readers.read_csv_alignment_flo( align_file) elif align_ext == '.json': with open(alignment_filename) as align_file: sequences, headers = data_readers.read_json_alignment_flo( align_file) elif align_ext == '.phylip': with open(alignment_filename) as align_file: sequences = data_readers.read_phylip_alignment_flo(align_file) elif align_ext == '.tbl': with open(alignment_filename) as align_file: sequences = data_readers.read_table_alignment_flo(align_file) else: raise IOError('Cannot handle alignments with extension: {}'.format( align_ext)) char_mtx = data_readers.get_character_matrix_from_sequences_list( sequences) # Run analysis with pytest.raises(Exception): anc_dp.calculate_ancestral_distributions(tree, char_mtx)
def test_file_valid(self, valid_json_alignment): """Tests that the valid alignment files do not fail. Args: valid_json_alignment (pytest.fixture): A parameterized pytest fixture providing valid json alignment filenames. """ with open(valid_json_alignment) as in_json: try: (sequence_list, headers ) = dr.read_json_alignment_flo(in_json) if headers is not None: assert len(headers) > 0 assert len(sequence_list) > 0 for i in sequence_list: assert isinstance(i, Sequence) assert isinstance(i.name, string_formats) assert len(i.cont_values) > 0 except Exception as e: print('Raised exception: {}'.format(str(e))) assert False
# Check that input files exist if not os.path.exists(args.in_tree_filename): raise IOError( 'Input tree {} does not exist'.format(args.in_tree_filename)) if not os.path.exists(args.pam_filename): raise IOError( 'Input data file {} does not exist'.format(args.pam_filename)) # Read data if args.data_format == 'csv': with open(args.pam_filename) as in_file: sequences, headers = data_readers.read_csv_alignment_flo( in_file) elif args.data_format == 'json': with open(args.pam_filename) as in_file: sequences, headers = data_readers.read_json_alignment_flo( in_file) elif args.data_format == 'phylip': with open(args.pam_filename) as in_file: sequences = data_readers.read_phylip_alignment_flo(in_file) headers = None elif args.data_format == 'table': with open(args.pam_filename) as in_file: sequences = data_readers.read_table_alignment_flo(in_file) headers = None else: raise Exception('Unknown data format: {}'.format(args.data_format)) # Get the label annotation column, or None # label_column = None # if args.annotate_labels is not None: # try:
def test_package_valid(self, valid_ancestral_state_package): """Tests the calculate_continusous_ancestral_states method. Args: valid_ancestral_state_package (pytest.fixture): A parameterized pytest fixture defined in conftest.py that provides a valid test package. Note: * This test will need to evolve as the output format changes. It will probably be better to return a data structure with various values for each node rather than assigning the value to the node label. Raises: IOError: When the tree or alignment cannot be loaded for the specified file extension. Exception: When a specified successful result value cannot be found. """ # Get the data files (tree_filename, alignment_filename, results_filename) = valid_ancestral_state_package # Process the tree file _, tree_ext = os.path.splitext(tree_filename) if tree_ext == '.nex': tree_schema = 'nexus' elif tree_ext == '.xml': tree_schema = 'nexml' elif tree_ext == '.tre': tree_schema = 'newick' else: raise IOError( 'Cannot handle tree with extension: {}'.format(tree_ext)) # tree = dendropy.Tree.get(path=tree_filename, schema=tree_schema) tree = TreeWrapper.get(path=tree_filename, schema=tree_schema) # Process the alignment file _, align_ext = os.path.splitext(alignment_filename) if align_ext == '.csv': with open(alignment_filename) as align_file: sequences, headers = data_readers.read_csv_alignment_flo( align_file) elif align_ext == '.json': with open(alignment_filename) as align_file: sequences, headers = data_readers.read_json_alignment_flo( align_file) elif align_ext == '.phylip': with open(alignment_filename) as align_file: sequences = data_readers.read_phylip_alignment_flo(align_file) elif align_ext == '.tbl': with open(alignment_filename) as align_file: sequences = data_readers.read_table_alignment_flo(align_file) else: raise IOError('Cannot handle alignments with extension: {}'.format( align_ext)) char_mtx = data_readers.get_character_matrix_from_sequences_list( sequences) # Run analysis _, anc_mtx = anc_dp.calculate_continuous_ancestral_states( tree, char_mtx, calc_std_err=True, sum_to_one=False) # New testing method # (For now) assume that results file is csv with row headers for # node labels and column headers for variables results = [] h = None with open(results_filename) as results_file: for line in results_file: if h is None: # Get headers h = line.strip().split(',')[1:] else: # Add result (without label) to list node_result = [ float(i) for i in line.strip().split(',')[1:] ] results.append(np.array(node_result, dtype=float)) # Look for all results (only maximum likelihood) for row in anc_mtx[:, :, 0]: found = False for i in range(len(results)): # Allow for some wiggle room with decimal precision if np.all(np.isclose(row, results[i])): found = True results.pop(i) break if not found: raise Exception( 'Could not find expected result: {} in results'.format( row))
def test_package_valid(self, valid_ancestral_distribution_package): """Tests the calculate_ancestral_distributions method. Args: invalid_ancestral_distribution_package (pytest.fixture): A pytest fixture that is parametrized to provide invalid ancestral distributions, one at a time, so that there are multiple test functions defined for each invalid package. Raises: IOError: When the tree or alignment cannot be loaded for the specified file extension. Exception: When a specified successful result value cannot be found. """ # Get the data files (tree_filename, alignment_filename, results_filename) = valid_ancestral_distribution_package # Process the tree file _, tree_ext = os.path.splitext(tree_filename) if tree_ext == '.nex': tree_schema = 'nexus' elif tree_ext == '.xml': tree_schema = 'nexml' elif tree_ext == '.tre': tree_schema = 'newick' else: raise IOError( 'Cannot handle tree with extension: {}'.format(tree_ext)) # tree = dendropy.Tree.get(path=tree_filename, schema=tree_schema) tree = TreeWrapper.get(path=tree_filename, schema=tree_schema) # Process the alignment file _, align_ext = os.path.splitext(alignment_filename) if align_ext == '.csv': with open(alignment_filename) as align_file: sequences, headers = data_readers.read_csv_alignment_flo( align_file) elif align_ext == '.json': with open(alignment_filename) as align_file: sequences, headers = data_readers.read_json_alignment_flo( align_file) elif align_ext == '.phylip': with open(alignment_filename) as align_file: sequences = data_readers.read_phylip_alignment_flo(align_file) elif align_ext == '.tbl': with open(alignment_filename) as align_file: sequences = data_readers.read_table_alignment_flo(align_file) else: raise IOError('Cannot handle alignments with extension: {}'.format( align_ext)) char_mtx = data_readers.get_character_matrix_from_sequences_list( sequences) # Run analysis _, anc_mtx = anc_dp.calculate_ancestral_distributions(tree, char_mtx) # Testing method # Assume that the results file is a csv with row headers for node # labels and output layer (maximum_likeliehood / standard_error) # and column headers for variables ml_results = [] std_err_results = [] h = None with open(results_filename) as results_file: for line in results_file: if h is None: # Get headers h = line.strip().split(',')[1:] else: # Add result (without label) to appropriate list parts = line.strip().split(',') layer = parts[1].lower() values = np.array([float(i) for i in parts[2:]], dtype=float) if layer == 'maximum_likelihood': ml_results.append(values) else: std_err_results.append(values) assert (len(ml_results) == len(std_err_results)) print('ml results') print(ml_results) print('std err results') print(std_err_results) # Look for all results (ml and std err results should match rows) for row_idx in range(anc_mtx.shape[0]): found = False # Get rows from data ml_row = anc_mtx[row_idx, :, 0] std_err_row = anc_mtx[row_idx, :, 1] for i in range(len(ml_results)): print(ml_results[i]) print(std_err_results[i]) if np.all(np.isclose(ml_row, ml_results[i])) and \ np.all(np.isclose( std_err_row, std_err_results[i])): found = True ml_results.pop(i) std_err_results.pop(i) break if not found: raise Exception('Could not find {}, {} in results'.format( ml_row, std_err_row))