Ejemplo n.º 1
0
 def __init__(self):
     self.session = create_session('sqlite:///:memory:', debug=True)
     self.wg = gateways.WrapperGateway(self.session)
     self.eg = gateways.ExtractionGateway(self.session)
     
     self.min_range = 2
     self.max_range = 3
     self.example_range = range(self.min_range, self.max_range)
    def __init__(self):
        self.session = create_session('sqlite:///:memory:', debug=True)
        self.wg = gateways.WrapperGateway(self.session)
        self.eg = gateways.ExtractionGateway(self.session)

        self.min_range = 2
        self.max_range = 3
        self.example_range = range(self.min_range, self.max_range)
 def setUp(self):
     factory = UtilFactory()
     self.session = create_session("sqlite:///:memory:", True)
     self.irc = IRController(factory)
     self.queries = [
         '"We view these methods as tools which can be used"',
         '"believe that the information thus extracted"',
         '"can be used to extract useful information"',
     ]
 def __init__(self):
     self.session = create_session('sqlite:///:memory:', debug=True)
     self.wg = gateways.WrapperGateway(self.session)
     self.eg = gateways.ExtractionGateway(self.session)
     
     self.min_range = 2
     self.max_range = 9
     self.example_range = range(self.min_range, self.max_range)
     
     self.fields = ['addres', 'author', 'isbn', 'issn', 'journal', 'number', 'pages', 'publisher', 'title', 'volume', 'year']
Ejemplo n.º 5
0
 def setUp(self):
     self.wm = WrapperGateway(create_session(
         sql_uri='sqlite:///:memory:', debug=True))
Ejemplo n.º 6
0
 def setUp(self):
     self.session = create_session('sqlite:///:memory:', True)
     self.wg = WrapperGateway(session=self.session)
Ejemplo n.º 7
0
 def __init__(self, session=None):
     if not session:
         session = create_session()
     self.session = session
 def setUp(self):
     self.wm = WrapperGateway(
         create_session(sql_uri='sqlite:///:memory:', debug=True))
Ejemplo n.º 9
0
    def run_library(self, library):
        self.file = open(''.join([self.base_path, library, '/extraction-results-', str(self.nexamples), '-corrected.csv']), 'w')
        self.session = create_session(''.join(['sqlite:///', self.base_path, '/', library, '/extraction-stats-', library, '-', str(self.nexamples), '-corrected.db']), debug=True)
        #self.session = create_session('sqlite:///:memory:', debug=True)
        self.wg = gateways.WrapperGateway(self.session)
        self.eg = gateways.ExtractionGateway(self.session)

        
        
        self.save_msg('Extraction results for library: %s' % library)
        
        files = open(''.join([self.base_path, library, '/', 'filelist.txt']), 'r')
        html_url, text_file = files.readline().split(' ', 1)
        files.seek(0)
        url = html_url.rsplit('/', 1)[0] #@UnusedVariable
        
        
        #self.import_generate(library, url)
        

        
        references = []
        for line in files.readlines():
            line = line.strip()
            html_url, text_file = line.split(' ', 1)
            
            text_file = open(text_file, 'r')
            text = text_file.read()
            text_file.close()
            
            top_results = [SearchResult('Some result', html_url)]
            print html_url

            refs, result = self.iec.extract_reference(top_results, text) #@UnusedVariable
            
            if refs:
                references.append(refs[0])
            else:
                references.append(None)
            
        # Load control references
        control_file = ''.join([self.base_path, library, '/extraction-results-control.bib'])
        control = self.rec._parse_entries_file(control_file)
         
        for control, extracted in zip(control, references):
            if not extracted:
                continue
            
            self.save_msg(extracted.entry)
            self.save_msg('\n')
            
            total_control_fields = 0
            total_extracted_fields = 0
            
            correct = 0
            parcial = 0
            error = 0
            
            valid = 0
            invalid = 0
            
            for field in control.fields:
                if field in ['url', 'reference_type', 'reference_id']:
                    continue
                
                control_value = control.get_field(field)
                total_control_fields += 1
                
                extracted_value = extracted.get_field(field)
                if not extracted_value:
                    continue
                
                if extracted_value.valid:
                    valid += 1
                else:
                    invalid += 1
                
                control_value = control_value.value
                extracted_value = extracted_value.value
                
                if type(control_value) is list:
                    self.save_msg('Comparing field %s values:\n\t%s\n\t%s' % (field, simplejson.dumps(control_value), simplejson.dumps(extracted_value)))
                    self.save_msg('\tCHECK MANUALLY')
                    continue
                
                control_value = control_value.strip()
                extracted_value = extracted_value.strip()
                
                control_regex = re.escape(control_value)
                extracted_regex = re.escape(extracted_value) #@UnusedVariable
                
                self.save_msg('Comparing field %s values:\n\t%s\n\t%s' % (field, control_value, extracted_value))
                
                if control_value == extracted_value:
                    correct += 1
                    self.save_msg('\tCorrect')
                elif re.search(control_regex, extracted_value): #or re.match(extracted_regex, control_value):
                    parcial += 1
                    self.save_msg('\tParcial')
                else:
                    error += 1
                    self.save_msg('\tIncorrect')
                
                total_extracted_fields += 1
            
            self.save_msg('')
            self.save_msg('Marked Valid;Marked invalid')
            self.save_msg('%d;%d' % (valid, invalid))
            
            self.save_msg('Total available;Total extracted;Incorrect;Parcial;Correct')
            self.save_msg('%d;%d;%d;%d;%d' % (total_control_fields, total_extracted_fields, error, parcial, correct))
            
            self.save_msg('\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n')
        self.file.close()
Ejemplo n.º 10
0
 def setUp(self):
     self.session = create_session('sqlite:///:memory:', True)
     self.wg = WrapperGateway(session=self.session)