def get_pathways_with_multiple_proteoforms(): map_proteins_to_proteoforms = read_dictionary_one_to_set( config.GRAPHS_PATH, "mapping_proteins_to_proteoforms.tsv", col_indices=(0, 1)) pathways = get_pathways()["stId"] selected_proteins = [] for protein, proteoforms in map_proteins_to_proteoforms.items(): if len(proteoforms) > 1: selected_proteins.append(protein) selected_pathways = [] for pathway in pathways: filename = get_json_filename(config.proteins, config.no_sm, config.PATHWAY_GRAPHS_PATH, pathway) if not Path(filename).exists(): create_pathway_interaction_network(pathway, config.proteins, config.no_sm, config.PATHWAY_GRAPHS_PATH) G = read_graph(filename) if any(protein in selected_proteins for protein in list(G.nodes)): selected_pathways.append(pathway) return selected_pathways
def test_read_dictionary_missing_two_columns(self): """With a one column file, request default columns 0 and 1, report error""" # Create file with three columns, some not in lexicographic order file_name = TestCase.id(self) + '_single_column.txt' with open(file_name, 'w') as file: for x in range(5): file.write(f"{x}\n") with self.assertRaises( ValueError, msg= 'Should raise an exception because needed columns of the file are missing.' ): read_dictionary_one_to_set('', file_name, order_pairs=True) os.remove(file_name)
def get_pathways_with_multiple_proteoforms(data_path, output_path): """ Get list of pathways which contain proteins with multiple proteoforms """ map_proteins_to_proteoforms = read_dictionary_one_to_set( data_path, "mapping_proteins_to_proteoforms.tsv", col_indices=(0, 1)) # Create list of proteins that have multiple proteoforms selected_proteins = [] for protein, proteoforms in map_proteins_to_proteoforms.items(): if len(proteoforms) > 1: selected_proteins.append(protein) pathways = get_pathways()["stId"] # Get complete list of pathways selected_pathways = [] for pathway in pathways: participants = get_participants_by_pathway(pathway, config.proteins, output_path) # filename = get_json_filename( # config.proteins, config.no_sm, output_path, pathway) # if not Path(filename).exists(): # create_pathway_interaction_network( # pathway, config.proteins, config.no_sm, output_path) # G = read_graph(filename) if any(protein in selected_proteins for protein in set(participants["Id"])): selected_pathways.append(pathway) return selected_pathways
def setUp(self): self.letters = {chr(ord('A') + i): i for i in range(23)} languages = [('C++', '.cpp'), ('Python', '.py'), ('JavaScript', '.js'), ('C++', '.hpp'), ('C++', '.cpp')] with open('languages.txt', 'w') as file_languages: for entry in languages: file_languages.write(f"{entry[0]}\t{entry[1]}\n") self.languages = read_dictionary_one_to_set('./', 'languages.txt')
def test_read_dictionary_missing_index_columns(self): """With two columns file, indices other than (0, 1), like (1, 2), show error.""" # Create file with three columns, some not in lexicographic order pairs = [('a', 'b'), ('c', 'b'), ('d', 'e')] file_name = TestCase.id(self) + '_pairs.txt' with open(file_name, 'w') as file: for x, y in pairs: file.write(f"{x}\t{y}\n") with self.assertRaises( ValueError, msg= 'Should raise an exception because needed columns of the file are missing.' ): read_dictionary_one_to_set('', file_name, col_indices=(1, 2)) os.remove(file_name)
def test_read_dictionary_skip_header(self): # Create trio file with headers trios = [('Column1', 'Column2', 'Column3'), (1, 1, 2), (2, 3, 2), (3, 4, 5)] file_name = TestCase.id(self) + '_pairs.txt' with open(file_name, 'w') as file: for x, y, z in trios: file.write(f"{x}\t{y}\t{z}\n") # Execute target method result = read_dictionary_one_to_set('', file_name, order_pairs=True, col_indices=(1, 2), ignore_header=True) # Check headers are not taken as key, value pairs self.assertNotIn('Column1', result.keys(), msg="Missing key in dictionary") self.assertIn('1', result.keys(), msg="Missing key in dictionary") self.assertIn('2', result.keys(), msg="Missing key in dictionary") self.assertIn('4', result.keys(), msg="Missing key in dictionary") # Remove precondition files os.remove(file_name) def test_merge_dictionaries(self): d1 = {'A': {'B', 'C'}, 'D': {'C'}, 'C': {'d'}} d2 = {'A': {'a', 'b', 'c'}, 'B': {'b'}, 'C': {'c', 'd', 'e'}} d = merge_dictionaries(d1, d2) self.assertEqual( 4, len(d.keys()), msg="There is a wrong number of keys in the dictionary") self.assertEqual(5, len(d['A']), msg="The number of elements in 'A' should be 5") self.assertTrue('a' in d['A']) self.assertTrue('D' in d) self.assertEqual(1, len(d['D']), msg="The number of elements in 'D' should be 1") self.assertTrue('B' in d) self.assertEqual(1, len(d['B']), msg="The number of elements in 'B' should be 1") self.assertEqual(3, len(d['C']), msg="The number of elements in 'C' should be 3") self.assertTrue('c' in d['C'])
def test_read_dictionary_order_pairs_true(self): # Create file with pairs. Some with inverted lexicographic order pairs = [('a', 'b'), ('c', 'b'), ('d', 'e')] file_name = TestCase.id(self) + '_pairs.txt' with open(file_name, 'w') as file: for x, y in pairs: file.write(f"{x}\t{y}\n") # Execute target method result = read_dictionary_one_to_set('', file_name, order_pairs=True) # Check the pairs order was corrected, showing them as key and value when word1 < word2 Lexicographical order self.assertIn( 'b', result.keys(), msg="Missing key because it did not order the column values") self.assertEqual(3, len(result.keys()), msg="Wrong number of columns") os.remove(file_name)
def test_read_dictionary_indices_1_2(self): # Create file with three columns, some not in lexicographic order trios = [(1, 1, 2), (2, 3, 2), (3, 4, 5)] file_name = TestCase.id(self) + '_pairs.txt' with open(file_name, 'w') as file: for x, y, z in trios: file.write(f"{x}\t{y}\t{z}\n") # Execute target method result = read_dictionary_one_to_set('', file_name, order_pairs=True, col_indices=(1, 2)) # Check values are correct self.assertIn('1', result.keys(), msg="Missing key in dictionary") self.assertIn('2', result.keys(), msg="Missing key in dictionary") self.assertNotIn('3', result.keys(), msg="Incorrect key in dictionary") self.assertIn('4', result.keys(), msg="Missing key in dictionary") # Remove file os.remove(file_name)