def __init__(self): self.session = create_session('sqlite:///:memory:', debug=True) self.wg = gateways.WrapperGateway(self.session) self.eg = gateways.ExtractionGateway(self.session) self.min_range = 2 self.max_range = 3 self.example_range = range(self.min_range, self.max_range)
def setUp(self): factory = UtilFactory() self.session = create_session("sqlite:///:memory:", True) self.irc = IRController(factory) self.queries = [ '"We view these methods as tools which can be used"', '"believe that the information thus extracted"', '"can be used to extract useful information"', ]
def __init__(self): self.session = create_session('sqlite:///:memory:', debug=True) self.wg = gateways.WrapperGateway(self.session) self.eg = gateways.ExtractionGateway(self.session) self.min_range = 2 self.max_range = 9 self.example_range = range(self.min_range, self.max_range) self.fields = ['addres', 'author', 'isbn', 'issn', 'journal', 'number', 'pages', 'publisher', 'title', 'volume', 'year']
def setUp(self): self.wm = WrapperGateway(create_session( sql_uri='sqlite:///:memory:', debug=True))
def setUp(self): self.session = create_session('sqlite:///:memory:', True) self.wg = WrapperGateway(session=self.session)
def __init__(self, session=None): if not session: session = create_session() self.session = session
def setUp(self): self.wm = WrapperGateway( create_session(sql_uri='sqlite:///:memory:', debug=True))
def run_library(self, library): self.file = open(''.join([self.base_path, library, '/extraction-results-', str(self.nexamples), '-corrected.csv']), 'w') self.session = create_session(''.join(['sqlite:///', self.base_path, '/', library, '/extraction-stats-', library, '-', str(self.nexamples), '-corrected.db']), debug=True) #self.session = create_session('sqlite:///:memory:', debug=True) self.wg = gateways.WrapperGateway(self.session) self.eg = gateways.ExtractionGateway(self.session) self.save_msg('Extraction results for library: %s' % library) files = open(''.join([self.base_path, library, '/', 'filelist.txt']), 'r') html_url, text_file = files.readline().split(' ', 1) files.seek(0) url = html_url.rsplit('/', 1)[0] #@UnusedVariable #self.import_generate(library, url) references = [] for line in files.readlines(): line = line.strip() html_url, text_file = line.split(' ', 1) text_file = open(text_file, 'r') text = text_file.read() text_file.close() top_results = [SearchResult('Some result', html_url)] print html_url refs, result = self.iec.extract_reference(top_results, text) #@UnusedVariable if refs: references.append(refs[0]) else: references.append(None) # Load control references control_file = ''.join([self.base_path, library, '/extraction-results-control.bib']) control = self.rec._parse_entries_file(control_file) for control, extracted in zip(control, references): if not extracted: continue self.save_msg(extracted.entry) self.save_msg('\n') total_control_fields = 0 total_extracted_fields = 0 correct = 0 parcial = 0 error = 0 valid = 0 invalid = 0 for field in control.fields: if field in ['url', 'reference_type', 'reference_id']: continue control_value = control.get_field(field) total_control_fields += 1 extracted_value = extracted.get_field(field) if not extracted_value: continue if extracted_value.valid: valid += 1 else: invalid += 1 control_value = control_value.value extracted_value = extracted_value.value if type(control_value) is list: self.save_msg('Comparing field %s values:\n\t%s\n\t%s' % (field, simplejson.dumps(control_value), simplejson.dumps(extracted_value))) self.save_msg('\tCHECK MANUALLY') continue control_value = control_value.strip() extracted_value = extracted_value.strip() control_regex = re.escape(control_value) extracted_regex = re.escape(extracted_value) #@UnusedVariable self.save_msg('Comparing field %s values:\n\t%s\n\t%s' % (field, control_value, extracted_value)) if control_value == extracted_value: correct += 1 self.save_msg('\tCorrect') elif re.search(control_regex, extracted_value): #or re.match(extracted_regex, control_value): parcial += 1 self.save_msg('\tParcial') else: error += 1 self.save_msg('\tIncorrect') total_extracted_fields += 1 self.save_msg('') self.save_msg('Marked Valid;Marked invalid') self.save_msg('%d;%d' % (valid, invalid)) self.save_msg('Total available;Total extracted;Incorrect;Parcial;Correct') self.save_msg('%d;%d;%d;%d;%d' % (total_control_fields, total_extracted_fields, error, parcial, correct)) self.save_msg('\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n') self.file.close()