Beispiel #1
0
def run_preprocess(tagged_sent, event_dict, event_id, username=None,
                   locate=True, features=True):
    """
    Function to call the various preprocessing functions. 

    Inputs
    ------
    tagged_sent : POS tagged sentence. List.

    event_dict : dictionary to which the event information should be added.

    event_id : ID for the event, which serves as the key in the dictionary.
    Should be generated from `sent_split`. String.

    username : geonames username. String.

    locate : Whether to geolocate an event. Boolean.

    features : Whether to extract features from an event. Boolean. 
    """
    #If geolocate
    if locate:
        #Locate event and add lat, lon to the dict
        lat, lon = preprocess.geolocate(tagged_sent, username)
        event_dict[event_id]['lat'] = lat
        event_dict[event_id]['lon'] = lon
    #If not geolocate just add 'NA' to dict
    elif not locate:
        event_dict[event_id]['lat'] = 'NA'
        event_dict[event_id]['lon'] = 'NA'
    #If feature extraction
    if features:
        #Get the number involved and assign to the dict
        num = preprocess.num_involved(tagged_sent)
        event_dict[event_id]['number_involved'] = num
    #If not just assign 'NA'
    elif not features:
        event_dict[event_id]['number_involved'] = 'NA'
Beispiel #2
0
 def test_number_involved(self):
     reference = ['28', '18', '28', '6']
     output = [preprocess.num_involved(x[2]) for x in self.tag_sentence]
     self.assertEqual(reference, output)