Esempio n. 1
0
 def __init__(self, etk):
     ETKModule.__init__(self, etk)
     self.actor_type_decoder = DecodingValueExtractor(
         self.actor_codes, 'Actor Code Decoder')
     self.known_group_decoder = DecodingValueExtractor(
         self.known_group_codes, 'Known Groups Decoder')
     self.ethnic_group_decoder = DecodingValueExtractor(
         self.known_group_codes, 'Ethnic Groups Decoder')
     self.religion_decoder = DecodingValueExtractor(self.religion_codes,
                                                    'Religion Decoder')
     self.country_decoder = DecodingValueExtractor(self.country_codes,
                                                   'Country Decoder')
Esempio n. 2
0
 def __init__(self, etk):
     ETKModule.__init__(self, etk)
     self.date_extractor = DateExtractor(self.etk, 'gtd_date_parser')
     self.causeex_decoder = DecodingValueExtractor(
         event_to_clauseex_class_mapping,
         'CauseEx Type',
         default_action="delete")
Esempio n. 3
0
 def __init__(self, etk):
     ETKModule.__init__(self, etk)
     self.doc_selector = DefaultDocumentSelector()
     self.incomp_decoder = DecodingValueExtractor(self.incomp_type, 'Incomp Decoder')
     self.int_decoder = DecodingValueExtractor(self.int_event_type, 'Int Decoder')
     self.int_fatalities_decoder = DecodingValueExtractor(self.int_fatalities, 'Int Fatalities Decoder')
     self.int_fatalities_size_lower_decoder = DecodingValueExtractor(self.int_fatalities_size_lower,
                                                                     'Int Fatalities Lower Bound Size Decoder')
     self.int_fatalities_size_upper_decoder = DecodingValueExtractor(self.int_fatalities_size_upper,
                                                                     'Int Fatalities Upper Bound Size Decoder',
                                                                     default_action="delete")
     self.int_causeex_decoder = DecodingValueExtractor(self.int_causeex_type,
                                                       'Int CauseEx Type',
                                                       default_action="delete")
Esempio n. 4
0
 def __init__(self, etk):
     ETKModule.__init__(self, etk)
     self.weapon_decoder = DecodingValueExtractor(
         weapons_to_clauseex_class_mapping,
         'Causeex Weapon Type',
         default_action='delete')
Esempio n. 5
0
    def test_dictionary_extractor(self) -> None:
        decoding_dict = {
            'CA': 'California',
            'ny': 'New York',
            'AZ': ' Arizona',
            ' TX ': 'Texas',
            ' fl': 'Florida',
        }
        values = ['ca', 'CA', ' CA', ' ca', 'NY', ' ny', 'Az', 'AZ', 'az ', 'tx', 'tx ', 'TX', 'fl', 'FL', 'fl ']

        de_default = DecodingValueExtractor(decoding_dict, 'default_decoding') # strip_key and not case_sensitive
        de_case_sensitive = DecodingValueExtractor(decoding_dict, 'default_decoding', case_sensitive=True)
        de_not_strip_key = DecodingValueExtractor(decoding_dict, 'default_decoding', strip_key=False)
        de_strip_value = DecodingValueExtractor(decoding_dict, 'default_decoding', strip_value=True)

        results = list()
        results.append([de_default.extract(v)[0].value for v in values if de_default.extract(v)])
        results.append([de_case_sensitive.extract(v)[0].value for v in values if de_case_sensitive.extract(v)])
        results.append([de_not_strip_key.extract(v)[0].value for v in values if de_not_strip_key.extract(v)])
        results.append([de_strip_value.extract(v)[0].value for v in values if de_strip_value.extract(v)])

        expected = [
            [
                'California', 'California', 'California', 'California', 'New York', 'New York', ' Arizona',
                ' Arizona', ' Arizona', 'Texas', 'Texas', 'Texas', 'Florida', 'Florida', 'Florida'
            ],
            [
                'California', 'California', 'New York', ' Arizona', 'Texas', 'Florida', 'Florida'
            ],
            [
                'California', 'California', 'New York', ' Arizona', ' Arizona'
            ],
            [
                'California', 'California', 'California', 'California', 'New York', 'New York', 'Arizona',
                'Arizona', 'Arizona', 'Texas', 'Texas', 'Texas', 'Florida', 'Florida', 'Florida'
            ],
        ]

        self.assertEqual(results[:-1], expected[:-1])
Esempio n. 6
0
 def __init__(self, etk):
     ETKModule.__init__(self, etk)
     self.date_extractor = DateExtractor(self.etk, 'acled_date_parser')
     self.country_extractor = GlossaryExtractor(self.etk.load_glossary(
         "${GLOSSARY_PATH}/countries.json.gz", read_json=True),
                                                "country_extractor",
                                                self.etk.default_tokenizer,
                                                case_sensitive=False,
                                                ngrams=3)
     self.states_extractor = GlossaryExtractor(self.etk.load_glossary(
         "${GLOSSARY_PATH}/states_usa_canada.json.gz", read_json=True),
                                               "states_extractor",
                                               self.etk.default_tokenizer,
                                               case_sensitive=False,
                                               ngrams=3)
     self.cities_extractor = GlossaryExtractor(self.etk.load_glossary(
         "${GLOSSARY_PATH}/cities.json.gz", read_json=True),
                                               "cities_extractor",
                                               self.etk.default_tokenizer,
                                               case_sensitive=False,
                                               ngrams=3)
     self.csv_processor = CsvProcessor(etk=etk, heading_row=1)
     self.interaction_decoding_dict = {
         "10": "Sole Military Action",
         "11": "Military Versus Military",
         "12": "Military Versus Rebels",
         "13": "Military Versus Political Militia",
         "14": "Military Versus Communal Militia",
         "15": "Military Versus Rioters",
         "16": "Military Versus Protesters",
         "17": "Military Versus Civilians",
         "18": "Military Versus Other",
         "20": "Sole Rebel Action",
         "22": "Rebels Versus Rebels",
         "23": "Rebels Versus Political Militia",
         "24": "Rebels Versus Communal Militia",
         "25": "Rebels Versus Rioters",
         "26": "Rebels Versus Protesters",
         "27": "Rebels Versus Civilians",
         "28": "Rebels Versus Other",
         "30": "Sole Political Militia Action",
         "33": "Political Militia Versus Political Militia",
         "34": "Political Militia Versus Communal Militia",
         "35": "Political Militia Versus Rioters",
         "36": "Political Militia Versus Protesters",
         "37": "Political Militia Versus Civilians",
         "38": "Political Militia Versus Other",
         "40": "Sole Communal Militia Action",
         "44": "Communal Militia Versus Communal Militia",
         "45": "Communal Militia Versus Rioters",
         "46": "Communal Militia Versus Protesters",
         "47": "Communal Militia Versus Civilians",
         "48": "Communal Militia Versus Other",
         "50": "Sole Rioter Action",
         "55": "Rioters Versus Rioters",
         "56": "Rioters Versus Protesters",
         "57": "Rioters Versus Civilians",
         "58": "Rioters Versus Other",
         "60": "Sole Protester Action",
         "66": "Protesters Versus Protesters",
         "68": "Protesters Versus Other",
         "78": "Other Actor Versus Civilians",
         "80": "Sole Other Action"
     }
     self.interaction_decoder = DecodingValueExtractor(
         self.interaction_decoding_dict,
         'default_decoding',
         case_sensitive=True)