Python preProcessContextData Exemples, DataProcessing.Util.preProcessContextData Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : LoadTrainingData.py Projet : gkeswani92/Word-Sense-Disambiguation

def getTrainingContextData():
    
    training_data = OrderedDict()
    
    #Initialising the xml parser for the training and test set
    training_root = initializeXMLParser(dir_path+training_file) 
    
    #Grabbing one word type at a time
    for word_type_xml in training_root:
        word_type = word_type_xml.attrib['item']
        training_data[word_type] = defaultdict(lambda: defaultdict(dict))
        
        #Grabbing the instance id and its list of senses
        for word_instance in word_type_xml:
            instance = word_instance.attrib['id']
            senses   = [answer.attrib['senseid'] for answer in word_instance.findall('answer')]
            pre_context  = word_instance.find('context').text.split()
            post_context = word_instance.find('context').find('head').tail.split()
            
            #Pre-processing the pre-context and post context
            #TODO: Check why this is reducing the accuracy of the model by 1%
            pre_context = preProcessContextData(pre_context)
            post_context = preProcessContextData(post_context)
            
            training_data[word_type]['training'][instance] = {"Sense":senses, "Pre-Context":pre_context, "Post-Context":post_context }
        
        #break;#TODO: Remove this breakpoint. Only testing for one word type right now
    return training_data

Exemple #2

0

Afficher le fichier

Fichier : Validation.py Projet : gkeswani92/Word-Sense-Disambiguation

def getTrainingContextData():
    
    training_data = OrderedDict()
    
    #Initialising the xml parser for the training and test set
    training_root = initializeXMLParser(dir_path+training_file) 
    
    #Grabbing one word type at a time
    for word_type_xml in training_root:
        word_type = word_type_xml.attrib['item']
        training_data[word_type] = defaultdict(lambda: defaultdict(dict))
        
        #Grabbing the instance id and its list of senses
        for word_instance in word_type_xml:
            instance     = word_instance.attrib['id']
            senses       = [answer.attrib['senseid'] for answer in word_instance.findall('answer')]
            pre_context  = word_instance.find('context').text.split()
            post_context = word_instance.find('context').find('head').tail.split()
            
            #Pre-processing the pre-context and post context
            pre_context = preProcessContextData(pre_context)
            post_context = preProcessContextData(post_context)
            
            #Dividing the training data into training and validation
            training_data[word_type]['training'][instance] = {"Sense":senses, "Pre-Context":pre_context, "Post-Context":post_context }
        
        #Choosing a random set of training data as the validation data
        training_data[word_type] = createValidationData(training_data[word_type])
        
    return training_data

Exemple #3

0

Afficher le fichier

Fichier : LoadTrainingData.py Projet : gkeswani92/Word-Sense-Disambiguation

def getTestContextData(test_data):
     
    #Initialising the xml parser for the training and test set
    training_root = initializeXMLParser(dir_path + test_file) 
     
    #Grabbing one word type at a time
    for word_type_xml in training_root:
        word_type = word_type_xml.attrib['item']
         
        #Grabbing the instance id and its list of senses
        for word_instance in word_type_xml:
            instance = word_instance.attrib['id']
            pre_context  = word_instance.find('context').text.split()
            post_context = word_instance.find('context').find('head').tail.split()
            
            pre_context = preProcessContextData(pre_context)
            post_context = preProcessContextData(post_context)
            
            test_data[word_type]['test'][instance] = {"Pre-Context":pre_context, "Post-Context":post_context }
            
        #break#TODO: Remove this breakpoint. Only testing for one word type right now
    return test_data