예제 #1
0
    def test_assign_step_multiple_functional_elements_all_none_present(self):
        """Test assignment of multiple elements when all are not present."""

        test_cache = AssignmentCache()
        parent_genome_property = GenomeProperty(accession_id='GenProp0065',
                                                name='YOLO',
                                                property_type="TEMP")

        step = [
            ('--', ''),
            ('SN', '1'),
            ('ID', 'Aferr subtype specific proteins'),
            ('EV', 'IPR017545; TIGR03114;'),  # NO
            ('TG', 'GO:0043571;'),
            ('ID', 'Yolo subtype specific proteins'),
            ('EV', 'IPR017545; TIGR03115;'),  # NO
            ('TG', 'GO:0043571;')
        ]

        parsed_step = parse_steps(step)[0]
        parsed_step.parent = parent_genome_property

        step_assignment = assign_step(test_cache, parsed_step)

        self.assertEqual(step_assignment, 'NO')
        self.assertEqual(test_cache.get_step_assignment('GenProp0065', 1),
                         'NO')
예제 #2
0
    def test_assignment_cache_synchronization(self):
        """Test that the assignment file can be properly synchronized."""

        test_cache = AssignmentCache()
        test_tree = self.test_tree

        test_cache.cache_property_assignment('GenProp0456', 'YES')
        test_cache.cache_property_assignment('GenProp0710', 'YES')

        sanitized_cache = create_synchronized_assignment_cache(test_cache, test_tree)

        self.assertEqual(len(sanitized_cache.property_assignments), 1)
        self.assertEqual(sanitized_cache.get_property_assignment('GenProp0710'), 'YES')
예제 #3
0
    def test_genome_property_assignment_two_required_all_present(self):
        """Test assignment of genome properties when two markers are required and all are present."""

        test_cache = AssignmentCache(interpro_member_database_identifiers=[
            'TIGR03564', 'TIGR03567', 'TIGR03568'
        ])

        property_rows = [
            ('AC', 'GenProp0089'),
            ('DE', 'Coenzyme F420 utilization'),
            ('TP', 'GUILD'),
            ('--', ''),
            ('SN', '1'),
            ('ID', 'LLM-family F420-associated subfamilies'),
            ('RQ', '1'),
            ('EV', 'IPR019910; TIGR03564; sufficient;'),  # YES
            ('--', ''),
            ('SN', '2'),
            ('ID', 'LLM-family F420-associated subfamilies'),
            ('RQ', '1'),
            ('EV', 'IPR019910; TIGR03567; sufficient;'),  # YES
            ('--', ''),
            ('SN', '3'),
            ('ID', 'LLM-family F420-associated subfamilies'),
            ('RQ', '0'),
            ('EV', 'IPR019910; TIGR03568; sufficient;')  # YES
        ]

        test_property = parse_genome_property(property_rows)

        test_property.threshold = 0
        assignment = assign_genome_property(test_cache, test_property)

        self.assertEqual(assignment, 'YES')
예제 #4
0
    def test_genome_property_assignment_two_required_three_absent(self):
        """Test assignment of genome properties when two markers are required and none are present."""

        test_cache = AssignmentCache()

        property_rows = [
            ('AC', 'GenProp0089'),
            ('DE', 'Coenzyme F420 utilization'),
            ('TP', 'GUILD'),
            ('--', ''),
            ('SN', '1'),
            ('ID', 'LLM-family F420-associated subfamilies'),
            ('RQ', '1'),
            ('EV', 'IPR019910; TIGR03564; sufficient;'),  # NO
            ('--', ''),
            ('SN', '2'),
            ('ID', 'LLM-family F420-associated subfamilies'),
            ('RQ', '1'),
            ('EV', 'IPR019910; TIGR03567; sufficient;'),  # NO
            ('--', ''),
            ('SN', '3'),
            ('ID', 'LLM-family F420-associated subfamilies'),
            ('RQ', '0'),
            ('EV', 'IPR019910; TIGR03568; sufficient;')  # NO
        ]

        test_property = parse_genome_property(property_rows)

        test_property.threshold = 0
        assignment = assign_genome_property(test_cache, test_property)

        self.assertEqual(assignment, 'NO')
예제 #5
0
    def test_assign_functional_element_all_identifiers(self):
        """Test assignment of a functional element when all evidence markers are present."""

        test_cache = AssignmentCache(interpro_member_database_identifiers=[
            'TIGR03114', 'TIGR03117', 'TIGR03120'
        ])

        functional_element = [
            ('--', ''),
            ('SN', '1'),
            ('ID', 'Aferr subtype specific proteins'),
            ('RQ', '0'),
            ('EV', 'IPR017545; TIGR03114; sufficient;'),  # Yes
            ('EV', 'IPR017547; TIGR03117;'),  # Yes
            ('TG', 'GO:0043571;'),
            ('EV', 'IPR017552; TIGR03120;'),  # Yes
            ('TG', 'GO:0043573;')
        ]

        parsed_functional_element = parse_functional_elements(
            functional_element)[0]

        assignment = assign_functional_element(test_cache,
                                               parsed_functional_element)

        self.assertEqual(assignment, 'YES')
예제 #6
0
    def test_assign_functional_element_none_sufficient_all_not_present(self):
        """Test assignment of a functional element when no sufficient identifiers and none present."""

        test_cache = AssignmentCache(interpro_member_database_identifiers=[
            'TIGR03115', 'TIGR03118', 'TIGR03121'
        ])

        functional_element = [
            ('--', ''),
            ('SN', '1'),
            ('ID', 'Aferr subtype specific proteins'),
            ('RQ', '0'),
            ('EV', 'IPR017545; TIGR03114;'),  # NO
            ('EV', 'IPR017547; TIGR03117;'),  # NO
            ('TG', 'GO:0043571;'),
            ('EV', 'IPR017552; TIGR03120;'),  # NO
            ('TG', 'GO:0043573;')
        ]

        parsed_functional_element = parse_functional_elements(
            functional_element)[0]

        assignment = assign_functional_element(test_cache,
                                               parsed_functional_element)

        self.assertEqual(assignment, 'NO')
예제 #7
0
    def test_genome_property_assignment_non_required_none_present(self):
        """Test assignment of genome properties when no markers are present and none are required."""

        test_cache = AssignmentCache()
        test_property = self.tree.root

        assignment = assign_genome_property(test_cache, test_property)

        self.assertEqual(assignment, 'NO')
예제 #8
0
    def test_genome_property_assignment_non_required_one_present(self):
        """Test assignment of a genome property when one marker is present but none are required."""

        test_cache = AssignmentCache(
            interpro_member_database_identifiers=['TIGR03565'])
        test_property = self.tree.root

        assignment = assign_genome_property(test_cache, test_property)

        self.assertEqual(assignment, 'PARTIAL')
예제 #9
0
def parse_genome_property_longform_file(longform_file):
    """
    Parses longform genome properties assignment files.

    :param longform_file: A longform genome properties assignment file handle object.
    :return: An assignment cache object.
    """
    property_id = ''
    step_number = ''

    assignment_cache = AssignmentCache(sample_name=splitext(basename(longform_file.name))[0])

    for line in longform_file:
        if 'PROPERTY:' in line:
            property_id = line.split(':')[1].strip()
        elif 'STEP NUMBER:' in line:
            step_number = int(line.split(':')[1].strip())
        elif 'RESULT:' in line:
            assignment = line.split(':')[1].strip().upper()

            if 'STEP' in line:
                assignment_cache.cache_step_assignment(property_id, step_number, assignment)
            else:
                assignment_cache.cache_property_assignment(property_id, assignment)
        else:
            continue

    return assignment_cache
예제 #10
0
def parse_interproscan_file(interproscan_file):
    """
    Parses InterProScan TSV files into an assignment cache.

    :param interproscan_file: A InterProScan file handle object.
    :return: An assignment cache object.
    """
    identifiers = []
    tsv_reader = csv.reader(interproscan_file, delimiter='\t')
    for row in tsv_reader:
        matched_interpro_member_database_id = row[4]
        identifiers.append(matched_interpro_member_database_id)

    return AssignmentCache(interpro_member_database_identifiers=identifiers,
                           sample_name=splitext(basename(interproscan_file.name))[0])
예제 #11
0
    def test_get_identifiers(self):
        """Test that we can get the correct assignment identifiers from the cache."""

        test_cache = AssignmentCache()
        test_cache.cache_property_assignment('GenProp0067', 'YES')
        test_cache.cache_property_assignment('GenProp0092', 'NO')
        identifiers = test_cache.genome_property_identifiers
        identifiers.sort()

        self.assertEqual(identifiers, ['GenProp0067', 'GenProp0092'])
예제 #12
0
    def test_assign_evidence_from_no_interpro_identifiers(self):
        """Test we can assign evidence when no InterPro identifiers are in the assignment cache."""

        test_cache = AssignmentCache()

        evidence = [('--', ''), ('SN', '1'),
                    ('ID', 'Aferr subtype specific proteins'),
                    ('DN', 'Crispy Proteins'), ('RQ', '0'),
                    ('EV', 'IPR017545; TIGR03114; sufficient;'),
                    ('TG', 'GO:0043571;')]

        evidence = parse_evidences(evidence)[0]

        assignment = assign_evidence(test_cache, evidence)

        self.assertEqual(assignment, 'NO')
예제 #13
0
    def test_assign_evidence_when_missing_interpro_identifiers(self):
        """Test assign evidence based on InterPro identifiers that are missing from the assignment cache."""

        test_cache = AssignmentCache(interpro_member_database_identifiers=[
            'TIGR03192', 'TIGR03193', 'TIGR03194'
        ])

        evidence = [('--', ''), ('SN', '1'),
                    ('ID', 'Aferr subtype specific proteins'),
                    ('DN', 'Crispy Proteins'), ('RQ', '0'),
                    ('EV', 'IPR017545; TIGR03114; sufficient;'),
                    ('TG', 'GO:0043571;')]

        evidence = parse_evidences(evidence)[0]

        assignment = assign_evidence(test_cache, evidence)

        self.assertEqual(assignment, 'NO')
예제 #14
0
    def test_cache_flush(self):
        """Test that the cache can be properly flushed."""

        test_cache = AssignmentCache()
        test_cache.cache_property_assignment('GenProp0067', 'YES')
        test_cache.cache_property_assignment('GenProp0092', 'NO')
        test_cache.cache_step_assignment('GenProp0067', 1, 'YES')
        test_cache.cache_step_assignment('GenProp0092', 1, 'NO')

        test_cache.flush_property_from_cache('GenProp0067')

        self.assertEqual(test_cache.get_property_assignment("GenProp0067"),
                         None)
        self.assertEqual(test_cache.get_step_assignment("GenProp0067", 1),
                         None)
        self.assertEqual(len(test_cache.property_assignments), 1)
        self.assertEqual(len(test_cache.step_assignments), 1)
예제 #15
0
    def setUpClass(cls):
        """Set up testing data for testing."""

        prebuilt_cache = AssignmentCache()

        prebuilt_cache.cache_property_assignment('GenProp0053', 'YES')
        prebuilt_cache.cache_property_assignment('GenProp0052', 'NO')
        prebuilt_cache.cache_property_assignment('GenProp0051', 'PARTIAL')

        prebuilt_cache.cache_step_assignment('GenProp0053', 1, 'YES')
        prebuilt_cache.cache_step_assignment('GenProp0053', 2, 'NO')
        prebuilt_cache.cache_step_assignment('GenProp0053', 3, 'YES')

        cls.cache = prebuilt_cache
        """
        Test Properties Rooted DAG Structure:

                    --> GenProp0089
        GenProp0066
                    --> GenProp0092
        """

        property_rows_one = [('AC', 'GenProp0066'),
                             ('DE', 'Coenzyme F420 utilization'),
                             ('TP', 'GUILD'), ('--', ''), ('SN', '1'),
                             ('ID', 'Selfish genetic elements'), ('RQ', '0'),
                             ('EV', 'GenProp0089;'), ('--', ''), ('SN', '2'),
                             ('ID', 'Selfish genetic elements'), ('RQ', '0'),
                             ('EV', 'GenProp0092;')]

        property_rows_two = [('AC', 'GenProp0089'),
                             ('DE', 'Coenzyme F420 utilization'),
                             ('TP', 'GUILD'), ('--', ''), ('SN', '1'),
                             ('ID', 'LLM-family F420-associated subfamilies'),
                             ('RQ', '0'),
                             ('EV', 'IPR019910; TIGR03564; sufficient;')]

        property_rows_three = [('AC', 'GenProp0092'),
                               ('DE', 'Coenzyme F420 utilization'),
                               ('TP', 'GUILD'), ('--', ''), ('SN', '1'),
                               ('ID',
                                'LLM-family F420-associated subfamilies'),
                               ('RQ', '0'),
                               ('EV', 'IPR019910; TIGR03565; sufficient;')]

        property_one = parse_genome_property(property_rows_one)
        property_two = parse_genome_property(property_rows_two)
        property_three = parse_genome_property(property_rows_three)

        raw_properties = [property_one, property_two, property_three]

        cls.tree = GenomePropertiesTree(*raw_properties)