def test_geolocate(self): reference = [('33.5101981468', '36.2912750244'), ('12.1666667', '6.25'), ('35', '38'), ('24.9056', '67.0822')] output = [preprocess.geolocate(x[2], '######') for x in self.tag_sentence] self.assertEqual(reference, output)
def run_preprocess(tagged_sent, event_dict, event_id, username=None, locate=True, features=True): """ Function to call the various preprocessing functions. Inputs ------ tagged_sent : POS tagged sentence. List. event_dict : dictionary to which the event information should be added. event_id : ID for the event, which serves as the key in the dictionary. Should be generated from `sent_split`. String. username : geonames username. String. locate : Whether to geolocate an event. Boolean. features : Whether to extract features from an event. Boolean. """ #If geolocate if locate: #Locate event and add lat, lon to the dict lat, lon = preprocess.geolocate(tagged_sent, username) event_dict[event_id]['lat'] = lat event_dict[event_id]['lon'] = lon #If not geolocate just add 'NA' to dict elif not locate: event_dict[event_id]['lat'] = 'NA' event_dict[event_id]['lon'] = 'NA' #If feature extraction if features: #Get the number involved and assign to the dict num = preprocess.num_involved(tagged_sent) event_dict[event_id]['number_involved'] = num #If not just assign 'NA' elif not features: event_dict[event_id]['number_involved'] = 'NA'