예제 #1
0
    def test_find_closer_to_pivot(self):
        input = "Havinga et al systematically changed n from 3.00 to 4.00 by synthesizing LaTl 3" \
                " (n=3.00, T c =1.6 K), LaPb 3 (n=3.75, T c =4.1 K), and " \
                "ThPb 3 (n=4.00, T c =5.6 K) and the solid solutions " \
                "La (Tl 1−x Pb x ) 3 and (La 1−x Th x )Pb 3 ."

        spans = [("LaTl 3", "<material>"), ("T c", "<tc>"), ("1.6 K", "<tcValue>"),
                 ("LaPb 3", "<material>"), ("T c", "<tc>"), ("4.1 K", "<tcValue>"),
                 ("ThPb 3", "<material>"), ("T c", "<tc>"), ("5.6 K", "<tcValue>"),
                 ("La (Tl 1−x Pb x ) 3", "<material>"), ("(La 1−x Th x )Pb 3", "<material>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        target = VicinityResolutionResolver()
        closer_entity = target.find_closer_to_pivot(materials[0], tc_values)

        assert closer_entity != None
        assert closer_entity.text == "1.6 K"

        closer_entity = target.find_closer_to_pivot(materials[1], tc_values)
        assert closer_entity != None
        assert closer_entity.text == "1.6 K"
예제 #2
0
    def test_vicinityResolution_5(self):
        input = "In fact, apart from the very recent discovery of the occurrence of a superconducting " \
                "phase at 200 K in sulfur hydride systems under ultrahigh pressures (up to 150 GPa) , " \
                "the highest T c materials found up until now can be grouped into two families: the " \
                "cuprates, with T c of up to 164 K [5] (in HgBa 2 Ca 2 Cu 3 O 9 at 30 GPa), and " \
                "Fe-pnictides and -chalcogenides (FPC) with T c of up to 55 K [6]."

        spans = [("200 K", "<tcValue>"), ("sulfur hydride", "<material>"), ("highest T c", "<tc>"),
                 ("cuprates", "<class>"), ("T c", "<tc>"), ("up to 164 K", "<tcValue>"),
                 ("HgBa 2 Ca 2 Cu 3 O 9", "<material>"), ("Fe-pnictides and -chalcogenides", "<class>"),
                 ("T c", "<tc>"), ("up to 55 K", "<tcValue>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        relationships = VicinityResolutionResolver().find_relationships(doc, materials, tc_values)

        assert len(relationships) == 2
        assert str(relationships[0][0]) == "sulfur hydride"
        assert str(relationships[0][1]) == "200 K"

        assert str(relationships[1][0]) == "HgBa 2 Ca 2 Cu 3 O 9"
        assert str(relationships[1][1]) == "up to 164 K"
예제 #3
0
    def test_vicinityResolution_respectively_4(self):
        input = "In this paper, we look at the Bi-based materials that have the chemical formula " \
                "Bi 2 Sr 2 Ca n-1 Cu n O 2n+4 (BiSCCO) where n=1, 2, 3 gives the first three members of this " \
                "class: Bi 2 Sr 2 CuO 6 , Bi 2 Sr 2 CaCu 2 O 8 and Bi 2 Sr 2 Ca 2 Cu 3 O 10 , with critical " \
                "temperatures ( ) T c of 20 K, 85 K and 110 K respectively."

        spans = [("Bi 2 Sr 2 Ca n-1 Cu n O 2n+4 (BiSCCO)", "<material>"),
                 ("Bi 2 Sr 2 CuO 6", "<material>"), ("Bi 2 Sr 2 CaCu 2 O 8", "<material>"),
                 ("Bi 2 Sr 2 Ca 2 Cu 3 O 10", "<material>"),
                 ("T c", "<tc>"), ("20 K", "<tcValue>"), ("85 K", "<tcValue>"), ("110 K", "<tcValue>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        relationships = VicinityResolutionResolver().find_relationships(doc, materials, tc_values)
        assert len(relationships) == 3

        assert str(relationships[0][0]) == "Bi 2 Sr 2 CuO 6"
        assert str(relationships[0][1]) == "20 K"

        assert str(relationships[1][0]) == "Bi 2 Sr 2 CaCu 2 O 8"
        assert str(relationships[1][1]) == "85 K"

        assert str(relationships[2][0]) == "Bi 2 Sr 2 Ca 2 Cu 3 O 10"
        assert str(relationships[2][1]) == "110 K"
예제 #4
0
    def test_calculate_distances_2(self):
        input = "Havinga et al systematically changed n from 3.00 to 4.00 by synthesizing LaTl 3. " \
                "T c = 1.6 K is then found in LaPb 3."

        spans = [("LaTl 3", "<material>"), ("T c", "<tc>"), ("1.6 K", "<tcValue>"),
                 ("LaPb 3", "<material>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        target = VicinityResolutionResolver()

        distances = target.calculate_distances(materials, tc_values, doc)

        assert len(distances) == 2
        assert distances[materials[0]][tc_values[0]] == 27.0
        assert distances[materials[1]][tc_values[0]] == 23.5
예제 #5
0
    def test_calculate_distances(self):
        input = "Havinga et al systematically changed n from 3.00 to 4.00 by synthesizing LaTl 3" \
                " (n=3.00, T c =1.6 K), LaPb 3 (n=3.75, T c =4.1 K), and " \
                "ThPb 3 with T c =5.6 K and the solid solutions " \
                "La (Tl 1−x Pb x ) 3 and (La 1−x Th x )Pb 3 ."

        spans = [("LaTl 3", "<material>"), ("T c", "<tc>"), ("1.6 K", "<tcValue>"),
                 ("LaPb 3", "<material>"), ("T c", "<tc>"), ("4.1 K", "<tcValue>"),
                 ("ThPb 3", "<material>"), ("T c", "<tc>"), ("5.6 K", "<tcValue>"),
                 ("La (Tl 1−x Pb x ) 3", "<material>"), ("(La 1−x Th x )Pb 3", "<material>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        target = VicinityResolutionResolver()

        distances = target.calculate_distances(materials, tc_values, doc)

        assert len(distances) == 5
        assert distances[materials[0]][tc_values[0]] == 7.5
        assert distances[materials[1]][tc_values[1]] == 7.5
        assert distances[materials[2]][tc_values[2]] == 18
예제 #6
0
    def test_vicinityResolution_respectively_2(self):
        input = "The critical temperature T C = 4.7 K discovered for La 3 Ir 2 Ge 2 in this work is by about 1.2 K " \
                "higher than that found for La 3 Rh 2 Ge 2 ."

        spans = [("critical temperature", "<tc>"), ("T C", "<tc>"), ("4.7 K", "<tcValue>"),
                 ("La 3 Ir 2 Ge 2", "<material>"),
                 ("La 3 Rh 2 Ge 2", "<material>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        relationships = VicinityResolutionResolver().find_relationships(doc, materials, tc_values)

        assert len(relationships) == 1
        assert str(relationships[0][0]) == "La 3 Ir 2 Ge 2"
        assert str(relationships[0][1]) == "4.7 K"
예제 #7
0
    def test_vicinityResolution_respectively_missingEntities_1(self):
        input = "Ba 1−x K x BiO 3−δ (BKBO) and BaPb 1−x Bi x O 3−δ (BPBO) are two such compounds that show T c 's " \
                "of 30 K [1] and 13 K [2], respectively, with carrier concentrations as low as 2×10 " \
                "21 cm −3 ."

        spans = [("BaPb 1−x Bi x O 3−δ (BPBO)", "<material>"),
                 ("T c", "<tc>"), ("30 K", "<tcValue>"), ("13 K", "<tcValue>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        relationships = VicinityResolutionResolver().find_relationships(doc, materials, tc_values)
        assert len(relationships) == 1

        assert str(relationships[0][0]) == "BaPb 1−x Bi x O 3−δ (BPBO)"
        assert str(relationships[0][1]) == "30 K"
예제 #8
0
    def test_vicinityResolution_3(self):
        input = "Havinga et al systematically changed n from 3.00 to 4.00 by synthesizing LaTl 3" \
                " (n=3.00, T c =1.6 K), LaPb 3 (n=3.75, T c =4.1 K), and " \
                "ThPb 3 (n=4.00, T c =5.6 K) and the solid solutions " \
                "La (Tl 1−x Pb x ) 3 and (La 1−x Th x )Pb 3 ."

        spans = [("LaTl 3", "<material>"), ("T c", "<tc>"), ("1.6 K", "<tcValue>"),
                 ("LaPb 3", "<material>"), ("T c", "<tc>"), ("4.1 K", "<tcValue>"),
                 ("ThPb 3", "<material>"), ("T c", "<tc>"), ("5.6 K", "<tcValue>"),
                 ("La (Tl 1−x Pb x ) 3", "<material>"), ("(La 1−x Th x )Pb 3", "<material>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        relationships = VicinityResolutionResolver().find_relationships(doc, materials, tc_values)

        assert len(relationships) == 3
예제 #9
0
    def test_vicinityResolution_missingOneEntity_1(self):
        input = "Superconductivity has been discovered in metal diborides like MgB 2 (T c =39 K ), (Mo 0.96 Zr 0.04 ) " \
                "0.85 B 2 (T c =8.2 K ), NbB 2 (T c =5.2 K [3]) and various other ternary borides ."

        spans = [("MgB 2", "<material>"), ("T c", "<tc>"),
                 ("(Mo 0.96 Zr 0.04 ) 0.85 B 2", "<material>"), ("T c", "<tc>"), ("8.2 K", "<tcValue>"),
                 ("NbB 2", "<material>"), ("T c", "<tc>"), ("5.2 K", "<tcValue>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        relationships = VicinityResolutionResolver().find_relationships(doc, materials, tc_values)
        assert len(relationships) == 2

        assert str(relationships[0][0]) == "(Mo 0.96 Zr 0.04 ) 0.85 B 2"
        assert str(relationships[0][1]) == "8.2 K"

        assert str(relationships[1][0]) == "NbB 2"
        assert str(relationships[1][1]) == "5.2 K"
예제 #10
0
    def test_vicinityResolution_respectively_1(self):
        input = "In the best cases a transition temperature of 38 K (zero resistance point), 25 K (zero " \
                "resistance point) and 38 K (midpoint) were measured for CCO/STO, CCO/BCO and LSCO/LCO, " \
                "respectively."

        spans = [("38 K", "<tcValue>"), ("25 K", "<tcValue>"), ("38 K", "<tcValue>"),
                 ("CCO/STO", "<material>"), ("CCO/BCO", "<material>"), ("LSCO/LCO", "<material>")]
        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        relationships = VicinityResolutionResolver().find_relationships(doc, materials, tc_values)

        assert len(relationships) == 3
        assert str(relationships[0][0]) == "CCO/STO"
        assert str(relationships[0][1]) == "38 K"

        assert str(relationships[1][0]) == "CCO/BCO"
        assert str(relationships[1][1]) == "25 K"

        assert str(relationships[2][0]) == "LSCO/LCO"
        assert str(relationships[2][1]) == "38 K"
예제 #11
0
    def test_vicinityResolution_4(self):
        input = "The investigated MnSi films are in a thickness regime where the magnetic transition " \
                "temperature T c assumes a thickness-independent enhanced value of 43 K as compared with " \
                "that of bulk MnSi, where T c ≈ 29 K. A detailed refinement of the EXAFS data reveals that " \
                "the Mn positions are unchanged, whereas the Si positions vary along the out-of-plane " \
                "direction, alternating in orientation from unit cell to unit cell."

        spans = [("MnSi films", "<material>"), ("T c", "<tc>"), ("43 K", "<tcValue>"),
                 ("MnSi", "<material>"), ("T c", "<tc>"), ("29 K", "<tcValue>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        relationships = VicinityResolutionResolver().find_relationships(doc, materials, tc_values)

        assert len(relationships) == 2

        assert str(relationships[0][0]) == "MnSi films"
        assert str(relationships[0][1]) == "43 K"

        assert str(relationships[1][0]) == "MnSi"
        assert str(relationships[1][1]) == "29 K"
예제 #12
0
    def test_find_following_entity(self):
        input = "Havinga et al systematically changed n from 3.00 to 4.00 by synthesizing LaTl 3" \
                " (n=3.00, T c =1.6 K), LaPb 3 (n=3.75, T c =4.1 K), and " \
                "ThPb 3 (n=4.00, T c =5.6 K) and the solid solutions " \
                "La (Tl 1−x Pb x ) 3 and (La 1−x Th x )Pb 3 ."

        spans = [("LaTl 3", "<material>"), ("T c", "<tc>"), ("1.6 K", "<tcValue>"),
                 ("LaPb 3", "<material>"), ("T c", "<tc>"), ("4.1 K", "<tcValue>"),
                 ("ThPb 3", "<material>"), ("T c", "<tc>"), ("5.6 K", "<tcValue>"),
                 ("La (Tl 1−x Pb x ) 3", "<material>"), ("(La 1−x Th x )Pb 3", "<material>")]

        doc = prepare_doc(input, spans)

        materials = [entity for entity in filter(lambda w: w.ent_type_ in ['<material>'], doc)]
        tc_values = [entity for entity in filter(lambda w: w.ent_type_ in ['<tcValue>'], doc)]

        target = VicinityResolutionResolver()
        following = target.find_following_entity(materials[1], tc_values)

        assert following is not None
        assert following.text == "4.1 K"

        all_entities = list(filter(lambda w: w.ent_type_ is not "", doc))

        following = target.find_following_entity(materials[2], all_entities)

        assert following is not None
        assert following.text == "T c"

        following = target.find_following_entity(materials[4], all_entities)
        assert following is None

        following = target.find_following_entity(tc_values[0], tc_values, "<material>")
        assert following is None

        following = target.find_following_entity(tc_values[2], all_entities, "<material>")
        assert following is not None
        assert following.text == "La (Tl 1−x Pb x ) 3"
예제 #13
0
    def process_sentence(self, words, spaces, spans):
        text = ''.join([
            words[i] + (' ' if spaces[i] else '')
            for i in range(0, len(words))
        ])

        # print("Processing: " + text)

        doc = self.init_doc(words, spaces, spans)

        extracted_entities = {}
        # svg = displacy.render(doc, style="dep")
        # filename = hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest()
        # output_path = Path(str(filename) + ".svg")
        # output_path.open("w", encoding="utf-8").write(svg)

        # extracted_entities['tokens'] = words

        ### TC VALUES CLASSIFICATION

        # self.markCriticalTemperature(doc)

        ### RELATIONSHIP EXTRACTION
        extracted_entities['relationships'] = []

        destination_entities = [
            entity for entity in filter(
                lambda w: w.ent_type_ in [self.destination], doc)
        ]
        source_entities = [
            entity for entity in filter(
                lambda w: w.ent_type_ in [self.source] and w._.linkable is
                True, doc)
        ]

        ## 1 simple approach (when only one temperature and one material)
        resolver = SimpleResolutionResolver()
        relationships = resolver.find_relationships(destination_entities,
                                                    source_entities)

        if len(relationships) > 0:
            extracted_entities['relationships'].extend(
                RuleBasedLinker.collect_relationships(relationships, 'simple'))
            # print(" Simple relationships " + str(extracted_entities['relationships']['simple']))
        else:
            ## 2 vicinity matching

            resolver = VicinityResolutionResolver()
            relationships = resolver.find_relationships(
                doc, destination_entities, source_entities)
            if len(relationships) > 0:
                extracted_entities['relationships'].extend(
                    RuleBasedLinker.collect_relationships(
                        relationships, 'vicinity'))
                # print(" Vicinity relationships " + str(extracted_entities['relationships']['vicinity']))
            # else:

            ## 3 dependency parsing matching

            # resolver = DependencyParserResolutionResolver()
            # relationships = resolver.find_relationships(destination_entities, source_entities)
            # if len(relationships) > 0:
            #     extracted_entities['relationships'].extend(
            #         RuleBasedLinker.collect_relationships(relationships, 'dependency'))
            # print(" Dep relationships " + str(extracted_entities['relationships']['dependency']))

        converted_spans = [
            span_to_dict(entity) for entity in filter(
                lambda w: w.ent_type_ in entities_classes(), doc)
        ]

        extracted_entities['spans'] = converted_spans
        extracted_entities['text'] = text

        return extracted_entities