def test_assign_step_multiple_functional_elements_all_none_present(self): """Test assignment of multiple elements when all are not present.""" test_cache = AssignmentCache() parent_genome_property = GenomeProperty(accession_id='GenProp0065', name='YOLO', property_type="TEMP") step = [ ('--', ''), ('SN', '1'), ('ID', 'Aferr subtype specific proteins'), ('EV', 'IPR017545; TIGR03114;'), # NO ('TG', 'GO:0043571;'), ('ID', 'Yolo subtype specific proteins'), ('EV', 'IPR017545; TIGR03115;'), # NO ('TG', 'GO:0043571;') ] parsed_step = parse_steps(step)[0] parsed_step.parent = parent_genome_property step_assignment = assign_step(test_cache, parsed_step) self.assertEqual(step_assignment, 'NO') self.assertEqual(test_cache.get_step_assignment('GenProp0065', 1), 'NO')
def test_assignment_cache_synchronization(self): """Test that the assignment file can be properly synchronized.""" test_cache = AssignmentCache() test_tree = self.test_tree test_cache.cache_property_assignment('GenProp0456', 'YES') test_cache.cache_property_assignment('GenProp0710', 'YES') sanitized_cache = create_synchronized_assignment_cache(test_cache, test_tree) self.assertEqual(len(sanitized_cache.property_assignments), 1) self.assertEqual(sanitized_cache.get_property_assignment('GenProp0710'), 'YES')
def test_genome_property_assignment_two_required_all_present(self): """Test assignment of genome properties when two markers are required and all are present.""" test_cache = AssignmentCache(interpro_member_database_identifiers=[ 'TIGR03564', 'TIGR03567', 'TIGR03568' ]) property_rows = [ ('AC', 'GenProp0089'), ('DE', 'Coenzyme F420 utilization'), ('TP', 'GUILD'), ('--', ''), ('SN', '1'), ('ID', 'LLM-family F420-associated subfamilies'), ('RQ', '1'), ('EV', 'IPR019910; TIGR03564; sufficient;'), # YES ('--', ''), ('SN', '2'), ('ID', 'LLM-family F420-associated subfamilies'), ('RQ', '1'), ('EV', 'IPR019910; TIGR03567; sufficient;'), # YES ('--', ''), ('SN', '3'), ('ID', 'LLM-family F420-associated subfamilies'), ('RQ', '0'), ('EV', 'IPR019910; TIGR03568; sufficient;') # YES ] test_property = parse_genome_property(property_rows) test_property.threshold = 0 assignment = assign_genome_property(test_cache, test_property) self.assertEqual(assignment, 'YES')
def test_genome_property_assignment_two_required_three_absent(self): """Test assignment of genome properties when two markers are required and none are present.""" test_cache = AssignmentCache() property_rows = [ ('AC', 'GenProp0089'), ('DE', 'Coenzyme F420 utilization'), ('TP', 'GUILD'), ('--', ''), ('SN', '1'), ('ID', 'LLM-family F420-associated subfamilies'), ('RQ', '1'), ('EV', 'IPR019910; TIGR03564; sufficient;'), # NO ('--', ''), ('SN', '2'), ('ID', 'LLM-family F420-associated subfamilies'), ('RQ', '1'), ('EV', 'IPR019910; TIGR03567; sufficient;'), # NO ('--', ''), ('SN', '3'), ('ID', 'LLM-family F420-associated subfamilies'), ('RQ', '0'), ('EV', 'IPR019910; TIGR03568; sufficient;') # NO ] test_property = parse_genome_property(property_rows) test_property.threshold = 0 assignment = assign_genome_property(test_cache, test_property) self.assertEqual(assignment, 'NO')
def test_assign_functional_element_all_identifiers(self): """Test assignment of a functional element when all evidence markers are present.""" test_cache = AssignmentCache(interpro_member_database_identifiers=[ 'TIGR03114', 'TIGR03117', 'TIGR03120' ]) functional_element = [ ('--', ''), ('SN', '1'), ('ID', 'Aferr subtype specific proteins'), ('RQ', '0'), ('EV', 'IPR017545; TIGR03114; sufficient;'), # Yes ('EV', 'IPR017547; TIGR03117;'), # Yes ('TG', 'GO:0043571;'), ('EV', 'IPR017552; TIGR03120;'), # Yes ('TG', 'GO:0043573;') ] parsed_functional_element = parse_functional_elements( functional_element)[0] assignment = assign_functional_element(test_cache, parsed_functional_element) self.assertEqual(assignment, 'YES')
def test_assign_functional_element_none_sufficient_all_not_present(self): """Test assignment of a functional element when no sufficient identifiers and none present.""" test_cache = AssignmentCache(interpro_member_database_identifiers=[ 'TIGR03115', 'TIGR03118', 'TIGR03121' ]) functional_element = [ ('--', ''), ('SN', '1'), ('ID', 'Aferr subtype specific proteins'), ('RQ', '0'), ('EV', 'IPR017545; TIGR03114;'), # NO ('EV', 'IPR017547; TIGR03117;'), # NO ('TG', 'GO:0043571;'), ('EV', 'IPR017552; TIGR03120;'), # NO ('TG', 'GO:0043573;') ] parsed_functional_element = parse_functional_elements( functional_element)[0] assignment = assign_functional_element(test_cache, parsed_functional_element) self.assertEqual(assignment, 'NO')
def test_genome_property_assignment_non_required_none_present(self): """Test assignment of genome properties when no markers are present and none are required.""" test_cache = AssignmentCache() test_property = self.tree.root assignment = assign_genome_property(test_cache, test_property) self.assertEqual(assignment, 'NO')
def test_genome_property_assignment_non_required_one_present(self): """Test assignment of a genome property when one marker is present but none are required.""" test_cache = AssignmentCache( interpro_member_database_identifiers=['TIGR03565']) test_property = self.tree.root assignment = assign_genome_property(test_cache, test_property) self.assertEqual(assignment, 'PARTIAL')
def parse_genome_property_longform_file(longform_file): """ Parses longform genome properties assignment files. :param longform_file: A longform genome properties assignment file handle object. :return: An assignment cache object. """ property_id = '' step_number = '' assignment_cache = AssignmentCache(sample_name=splitext(basename(longform_file.name))[0]) for line in longform_file: if 'PROPERTY:' in line: property_id = line.split(':')[1].strip() elif 'STEP NUMBER:' in line: step_number = int(line.split(':')[1].strip()) elif 'RESULT:' in line: assignment = line.split(':')[1].strip().upper() if 'STEP' in line: assignment_cache.cache_step_assignment(property_id, step_number, assignment) else: assignment_cache.cache_property_assignment(property_id, assignment) else: continue return assignment_cache
def parse_interproscan_file(interproscan_file): """ Parses InterProScan TSV files into an assignment cache. :param interproscan_file: A InterProScan file handle object. :return: An assignment cache object. """ identifiers = [] tsv_reader = csv.reader(interproscan_file, delimiter='\t') for row in tsv_reader: matched_interpro_member_database_id = row[4] identifiers.append(matched_interpro_member_database_id) return AssignmentCache(interpro_member_database_identifiers=identifiers, sample_name=splitext(basename(interproscan_file.name))[0])
def test_get_identifiers(self): """Test that we can get the correct assignment identifiers from the cache.""" test_cache = AssignmentCache() test_cache.cache_property_assignment('GenProp0067', 'YES') test_cache.cache_property_assignment('GenProp0092', 'NO') identifiers = test_cache.genome_property_identifiers identifiers.sort() self.assertEqual(identifiers, ['GenProp0067', 'GenProp0092'])
def test_assign_evidence_from_no_interpro_identifiers(self): """Test we can assign evidence when no InterPro identifiers are in the assignment cache.""" test_cache = AssignmentCache() evidence = [('--', ''), ('SN', '1'), ('ID', 'Aferr subtype specific proteins'), ('DN', 'Crispy Proteins'), ('RQ', '0'), ('EV', 'IPR017545; TIGR03114; sufficient;'), ('TG', 'GO:0043571;')] evidence = parse_evidences(evidence)[0] assignment = assign_evidence(test_cache, evidence) self.assertEqual(assignment, 'NO')
def test_assign_evidence_when_missing_interpro_identifiers(self): """Test assign evidence based on InterPro identifiers that are missing from the assignment cache.""" test_cache = AssignmentCache(interpro_member_database_identifiers=[ 'TIGR03192', 'TIGR03193', 'TIGR03194' ]) evidence = [('--', ''), ('SN', '1'), ('ID', 'Aferr subtype specific proteins'), ('DN', 'Crispy Proteins'), ('RQ', '0'), ('EV', 'IPR017545; TIGR03114; sufficient;'), ('TG', 'GO:0043571;')] evidence = parse_evidences(evidence)[0] assignment = assign_evidence(test_cache, evidence) self.assertEqual(assignment, 'NO')
def test_cache_flush(self): """Test that the cache can be properly flushed.""" test_cache = AssignmentCache() test_cache.cache_property_assignment('GenProp0067', 'YES') test_cache.cache_property_assignment('GenProp0092', 'NO') test_cache.cache_step_assignment('GenProp0067', 1, 'YES') test_cache.cache_step_assignment('GenProp0092', 1, 'NO') test_cache.flush_property_from_cache('GenProp0067') self.assertEqual(test_cache.get_property_assignment("GenProp0067"), None) self.assertEqual(test_cache.get_step_assignment("GenProp0067", 1), None) self.assertEqual(len(test_cache.property_assignments), 1) self.assertEqual(len(test_cache.step_assignments), 1)
def setUpClass(cls): """Set up testing data for testing.""" prebuilt_cache = AssignmentCache() prebuilt_cache.cache_property_assignment('GenProp0053', 'YES') prebuilt_cache.cache_property_assignment('GenProp0052', 'NO') prebuilt_cache.cache_property_assignment('GenProp0051', 'PARTIAL') prebuilt_cache.cache_step_assignment('GenProp0053', 1, 'YES') prebuilt_cache.cache_step_assignment('GenProp0053', 2, 'NO') prebuilt_cache.cache_step_assignment('GenProp0053', 3, 'YES') cls.cache = prebuilt_cache """ Test Properties Rooted DAG Structure: --> GenProp0089 GenProp0066 --> GenProp0092 """ property_rows_one = [('AC', 'GenProp0066'), ('DE', 'Coenzyme F420 utilization'), ('TP', 'GUILD'), ('--', ''), ('SN', '1'), ('ID', 'Selfish genetic elements'), ('RQ', '0'), ('EV', 'GenProp0089;'), ('--', ''), ('SN', '2'), ('ID', 'Selfish genetic elements'), ('RQ', '0'), ('EV', 'GenProp0092;')] property_rows_two = [('AC', 'GenProp0089'), ('DE', 'Coenzyme F420 utilization'), ('TP', 'GUILD'), ('--', ''), ('SN', '1'), ('ID', 'LLM-family F420-associated subfamilies'), ('RQ', '0'), ('EV', 'IPR019910; TIGR03564; sufficient;')] property_rows_three = [('AC', 'GenProp0092'), ('DE', 'Coenzyme F420 utilization'), ('TP', 'GUILD'), ('--', ''), ('SN', '1'), ('ID', 'LLM-family F420-associated subfamilies'), ('RQ', '0'), ('EV', 'IPR019910; TIGR03565; sufficient;')] property_one = parse_genome_property(property_rows_one) property_two = parse_genome_property(property_rows_two) property_three = parse_genome_property(property_rows_three) raw_properties = [property_one, property_two, property_three] cls.tree = GenomePropertiesTree(*raw_properties)