コード例 #1
0
    def run(self):
        """
        This method creates a pool of threads, starts them, and waits for the
        'input_queue' to be empty before asking them to stop.
        Results, if any, will be available in the 'output_queue'.
        """
        self._set_pool_size()

        log.debug('Active threads: %d' %
                  threading.active_count())  #@UndefinedVariable

        # Create threads and add them to the pool
        for i in range(self.pool_size):  #@UnusedVariable
            thread = self.thread_class(self.in_queue, self.out_queue,
                                       **self._thread_args)
            thread.name = 'Worker-%02d' % i
            self._thread_pool.append(thread)
            thread.start()

        log.debug('Active threads: %d' %
                  threading.active_count())  #@UndefinedVariable

        # Wait for the threads to process all the clients in the queue
        while not self.in_queue.empty():
            pass

        # Ask threads to stop
        for thread in self._thread_pool:
            thread.join()
        self.finished = True
コード例 #2
0
 def run(self):
     """
     Runs indefinitely until it is asked to finish.
     Processes files from the 'input_queue' and supplies them to a 
     'ReferenceMaker' object.
     Once the ReferenceMaker is done, it stores the results in tuples
     (file, reference) to the output queue.
     """
     log.debug("Running thread", extra={'threadname':self.getName()}) #@UndefinedVariable
     while not self.stop_event.isSet():
         file = None
         if not self.in_queue.empty():
             try:
                 file = self.in_queue.get(False)
             except Queue.Empty:
                 continue
         if file:
             log.debug("Processing file %s" % file) #@UndefinedVariable
             try:
                 reference = ReferenceMaker().make_reference(file,
                                                         self.target_format)
                 self.out_queue.put(reference)
             except Exception, e:
                 log.error('Unexpected exception while extracting reference' #@UndefinedVariable
                           ' for file %s: %s' % (file, str(e)))
                 self.out_queue.put(Extraction())
                 continue
コード例 #3
0
 def run(self):
     """
     Runs indefinitely until it is asked to finish.
     Processes files from the 'input_queue' and supplies them to a 
     'ReferenceMaker' object.
     Once the ReferenceMaker is done, it stores the results in tuples
     (file, reference) to the output queue.
     """
     log.debug("Running thread",
               extra={'threadname': self.getName()})  #@UndefinedVariable
     while not self.stop_event.isSet():
         file = None
         if not self.in_queue.empty():
             try:
                 file = self.in_queue.get(False)
             except Queue.Empty:
                 continue
         if file:
             log.debug("Processing file %s" % file)  #@UndefinedVariable
             try:
                 reference = ReferenceMaker().make_reference(
                     file, self.target_format)
                 self.out_queue.put(reference)
             except Exception, e:
                 log.error(
                     'Unexpected exception while extracting reference'  #@UndefinedVariable
                     ' for file %s: %s' % (file, str(e)))
                 self.out_queue.put(Extraction())
                 continue
コード例 #4
0
    def _create_new_collection(self):
        """
        Creates, if possible, a new collection of wrappers for a url and field
        name
        """
        collection_box = WrapperCollectionBox(self)
        result = collection_box.exec_()

        if result == QtGui.QDialog.Rejected:
            log.debug('Collection creation aborted')  #@UndefinedVariable
            return

        log.debug('Creating new collection %s %s' %
                  (collection_box.ui.urlLine.text(),
                   collection_box.ui.fieldLine.text()))  #@UndefinedVariable

        url = unicode(collection_box.ui.urlLine.text())
        field = unicode(collection_box.ui.fieldLine.text())

        collection = self.parent.wrapper_gw.new_wrapper_collection()
        collection.url = url
        collection.field = field
        item = self._add_collection(collection)
        self.ui.collections.setItemExpanded(item.parent(), True)
        self.ui.collections.setItemSelected(item, True)
コード例 #5
0
    def _rule_example(self, example):
        log.debug('Ruling example with MultiValuePathRuler') #@UndefinedVariable
        rule_example = super(MultiValuePathRuler, self)._rule_example
        values = list(example.value)
        count = len(values) 
        example_rules = []
        if not count:
            return []        
        
        # If there's only one value
        first_rules = rule_example(Example(values[0], example.content))
        if count == 1:
            for rule in first_rules:
                #example_rules.append(MultiValuePathRule(rule.pattern))
                example_rules.append(PathRule(rule.pattern))
            return example_rules
        
        more_rules = rule_example(Example(values[1], example.content))
        for f_rule in first_rules:
            f_rule_pattern = list(f_rule.pattern)
            if f_rule in more_rules:
                #example_rules.append(MultiValuePathRule(f_rule_pattern))
                example_rules.append(PathRule(f_rule_pattern))
                continue
            
            for s_rule in more_rules:
                if self._should_merge(f_rule, s_rule):
                    f_rule_pattern = self._merge_patterns(f_rule.pattern,
                                                          s_rule.pattern)
                
            example_rules.append(PathRule(f_rule_pattern))

        return example_rules
コード例 #6
0
    def run(self):
        """
        This method creates a pool of threads, starts them, and waits for the
        'input_queue' to be empty before asking them to stop.
        Results, if any, will be available in the 'output_queue'.
        """
        self._set_pool_size()
        
        log.debug('Active threads: %d' % threading.active_count()) #@UndefinedVariable
        
        # Create threads and add them to the pool
        for i in range(self.pool_size): #@UnusedVariable
            thread = self.thread_class(self.in_queue, self.out_queue,
                                       **self._thread_args)
            thread.name = 'Worker-%02d' % i
            self._thread_pool.append(thread)
            thread.start()
        
        log.debug('Active threads: %d' % threading.active_count()) #@UndefinedVariable
        
        # Wait for the threads to process all the clients in the queue
        while not self.in_queue.empty():
            pass

        # Ask threads to stop
        for thread in self._thread_pool:
            thread.join()
        self.finished = True
コード例 #7
0
 def _update_fields(self, reference):
     """
     Updates the fields of a reference
     """
     log.debug('Updating reference') #@UndefinedVariable
     for index in range(self.editor.fields.topLevelItemCount()):
         item = self.editor.fields.topLevelItem(index)
         
         # Remove empty items
         if ((len(reference.fields) > index) and 
             not (item.text(0) and item.text(1) and (item.text(2)))):
             reference.fields.pop(index) 
         
         # Skip non-empty items that have an invalid status
         if not ((item.text(2) == 'True' or item.text(2) == 'False')):
             continue
         
         log.debug('Index: %d Number of fields %d' % (index , len(reference.fields))) #@UndefinedVariable
         
         try:
             name = unicode(item.text(0))
             value = unicode(item.text(1))
             valid = True if str(item.text(2)) == "True" else False
         except TypeError, e:
             log.error('Type error when casting to store to database %s' % str(e)) #@UndefinedVariable
             continue
         
         if(len(reference.fields) > index):
             reference.fields[index].name = name
             reference.fields[index].value = value
             reference.fields[index].valid = valid
         else:
             reference.add_field(name, value, valid)
コード例 #8
0
 def _update_people(self, tree, people, add_method):
     """
     Updates the lists of authors or editors from a reference. The one
     that gets updated is decided depending on the people and add_method
     parameteres.
     """
     for index in range(tree.topLevelItemCount()):
         item = tree.topLevelItem(index)
         
         if not (item.text(0) or item.text(1) or item.text(2)):
             if(len(people) > index):
                 people.pop(index)
             continue
         
         log.debug("Index: %d Number of fields %d" % (index , len(people))) #@UndefinedVariable
         
         try:
             first_name = unicode(item.text(0))
             middle_name = unicode(item.text(1))
             last_name = unicode(item.text(2))
         except TypeError, e:
             log.error("Type error when casting to store to database %s" % str(e)) #@UndefinedVariable
             continue
         
         if(len(people) > index):
             people[index].name = first_name
             people[index].value = middle_name
             people[index].valid = last_name
         else:
             add_method(first_name, middle_name, last_name)
コード例 #9
0
    def populate(self, extraction):
        if not extraction:
            return
        
        self.enter_populating()
        self.clear()
        self.extraction = extraction
        log.debug("Loading extraction for: %s" % extraction.file_path) #@UndefinedVariable
        
        self.editor.filePathLine.setText(extraction.file_path)
        self.editor.queryLine.setText(extraction.query_string)
        self.editor.resultLine.setText(extraction.result_url)

        if not extraction.references:
            self.exit_populating()
            return

        reference = extraction.references[0]
        self.editor.validitySpin.setValue(reference.validity)
        
        # Add fields
        for field in reference.fields:
            item = QtGui.QTreeWidgetItem(self.editor.fields)
            item.setFlags(QtCore.Qt.ItemIsEditable | QtCore.Qt.ItemIsEnabled)
            item.setText(0, field.name)
            item.setText(1, field.value)
            item.setText(2, repr(field.valid))

        self.populate_people(self.editor.authors, reference.authors)
        self.populate_people(self.editor.editors, reference.editors)

        self.exit_populating()
コード例 #10
0
 def find_reference_by_id(self, id):
     if not id:
         raise ValueError
     
     log.debug('Querying the database. Reference with id %s' % str(id)) #@UndefinedVariable
     m_reference = (self.session.query(mappers.Reference).
                    filter(mappers.Reference.id == id).one())
     
     if not m_reference:
         return None
     
     log.debug('Creating new reference') #@UndefinedVariable
     reference = Reference()
     reference.id = m_reference.id
     reference.validity = m_reference.validity
     
     log.debug('Adding fields') #@UndefinedVariable
     for m_field in m_reference.fields:
         reference.set_field(m_field.name, m_field.value, m_field.valid)
     
     log.debug('Adding authors') #@UndefinedVariable
     authors = []
     for m_author in m_reference.authors:
         authors.append(m_author.to_name_dict())
     if authors:
         reference.set_field(u'author', authors, True)
     
     log.debug('Adding editors') #@UndefinedVariable
     editors = []
     for m_editor in m_reference.editors:
         editors.append(m_editor.to_name_dict())
     if editors:
         reference.set_field(u'editor', editors, True)
     
     return reference
コード例 #11
0
 def _evaluate_wrapper(self, wrapper, examples):
     """
     It applies the wrapper to all the available examples and checks if 
     it extracts the expected information. If so, it upvotes the wrapper,
     and downvotes otherwise.
     """
     for example in examples:
         log.debug('Evaluating wrapper with example value %s ' % #@UndefinedVariable
                   str(example.value)) 
         info = wrapper.extract_info(example.content)
         
         # Evaluate depending if the extracted information is one single
         # value or a collection.
         if not info:
             ev_result = False
         elif type(info) is list:
             ev_result = self._evaluate_multi_value_wrapper(info,
                                                            example.value)
         else:
             ev_result = self._evaluate_single_value_wrapper(info,
                                                             example.value)
         if ev_result:
             wrapper.upvotes += 1
         else:
             wrapper.downvotes += 1
         log.debug('Result of evaluation: %s' % str(ev_result)) #@UndefinedVariable
コード例 #12
0
 def _mark_for_update(self):
     """
     Marks the current reference for update
     """
     if not self.populating:
         self.for_update = True
         log.debug("Reference marked for update") #@UndefinedVariable
コード例 #13
0
 def _change_show_string(self, new):
     log.debug('Changing show string for current item') #@UndefinedVariable
     
     if self.last_selected:
         show_path = self._get_show_string(str(new))
         self.last_selected.show_path = show_path
         self.last_selected.setText(0, show_path) 
コード例 #14
0
    def initializePage(self):
        log.debug("Initializing references page.")  #@UndefinedVariable

        extractions = self.parent.extraction_gw.find_extractions()
        for extraction in extractions:
            self._add_extraction(extraction)
        self.ui.references.sortItems(0, QtCore.Qt.AscendingOrder)
コード例 #15
0
 def _update_rules(self, wrapper):
     for index in range(self.ui.rules.topLevelItemCount() - 1):
         item = self.ui.rules.topLevelItem(index)
         
         log.debug('Updating rule %d' % index) #@UndefinedVariable
         
         # Remove empty items
         if ((len(wrapper.rules) > index) and 
             not (item.text(0) and item.text(1) and item.text(2))):
             wrapper.rules.pop(index)
             continue
         
         # Skip non-empty items that have an invalid status
         try:
             rule_type = str(item.text(0))
             pattern = str(item.text(1))
             order = int(str(item.text(2)))
         except (TypeError, ValueError):
             log.error('Error when casting to store to database') #@UndefinedVariable
             continue
         
         # Check that the pattern can be converted to a python object
         try:
             pattern_py = simplejson.loads(pattern) #@UnusedVariable
         except ValueError:
             log.debug('Cannot convert pattern %s to Python objects' % pattern) #@UndefinedVariable
             continue
         
         # Update or append the rules
         if(len(wrapper.rules) > index):
             wrapper.rules[index].rule_type = rule_type
             wrapper.rules[index].pattern = pattern
             wrapper.rules[index].order = order
         else:
             wrapper.add_rule_by_info(rule_type, pattern, order)
コード例 #16
0
 def apply(self, input):
     log.debug('Applying PathRule') #@UndefinedVariable
     pattern = list(self.pattern)
     self.value_guide = pattern.pop(0)
     self.context = pattern.pop(0)
     elements = self._get_path_element(pattern, input)
     return self._choose_element(elements)
コード例 #17
0
 def _mark_wrapper_for_update(self):
     """
     Marks a wrapper for update if there have been changes to it and
     needs to be updated.
     Changes made to the collection while populating fields are discarded.
     """
     if not self.populating:
         self.wrapper_for_update = True
         log.debug('Marked wrapper for update') #@UndefinedVariable
コード例 #18
0
 def _mark_wrapper_for_update(self):
     """
     Marks a wrapper for update if there have been changes to it and
     needs to be updated.
     Changes made to the collection while populating fields are discarded.
     """
     if not self.populating:
         self.wrapper_for_update = True
         log.debug('Marked wrapper for update')  #@UndefinedVariable
コード例 #19
0
 def run(self):
     extracted = 0
     log.debug("Reference extraction thread running") #@UndefinedVariable
     while extracted < self.maximum:
         extracted = len(self.index_maker.processed)
         self.emit(QtCore.SIGNAL("output(int)"), extracted)
         time.sleep(0.5)
     log.debug("Exiting extraction thread") #@UndefinedVariable
     self.exit(0)
コード例 #20
0
 def _update_export_edit(self):
     log.debug('Finished formatting: %d formatted references' % #@UndefinedVariable 
               len(self.thread.formatted_references))
     formatted_references = self.thread.formatted_references
     
     text = ''
     for reference in formatted_references:
         text = ''.join([text, reference, '\n\n'])
     
     self.ui.entriesEdit.setText(text)
コード例 #21
0
 def _populate_collections(self):
     """
     Adds all the collections from the database to the collections list,
     grouped by url.
     """
     log.debug("Populating collections list")  #@UndefinedVariable
     self.enter_populating()
     for collection in self.parent.wrapper_gw.find_wrapper_collections():
         self._add_collection(collection)
     self.exit_populating()
コード例 #22
0
 def _populate_collections(self):
     """
     Adds all the collections from the database to the collections list,
     grouped by url.
     """
     log.debug("Populating collections list") #@UndefinedVariable    
     self.enter_populating()
     for collection in self.parent.wrapper_gw.find_wrapper_collections():
         self._add_collection(collection)
     self.exit_populating()
コード例 #23
0
 def extract_info(self, source, page):
     """
     Extracts a reference from the given page.
     """
     if source not in self._available_wrappers.keys():
         log.debug('No reference wrapper available for source %s' % source) #@UndefinedVariable
         return (None, None)
     
     wrapper_method = getattr(self,
                              '_do_' + self._available_wrappers[source])
     return wrapper_method(source, page) 
コード例 #24
0
 def _create_new_reference(self):
     extraction = self.parent.extraction_gw.new_extraction()
     item = self._add_extraction(extraction)
     
     log.debug('Changing selection to the new item') #@UndefinedVariable
     # Change selection to current wrapper
     try:
         self.ui.references.setItemSelected(self.last_selected, False)
     except:
         log.debug('Error unselecting extraction') #@UndefinedVariable
     self.ui.references.setItemSelected(item, True)
コード例 #25
0
    def initializePage(self):
        log.addHandler(self.guihandler)  #@UndefinedVariable
        path = self.field('filePath').toPyObject()
        log.debug("Starting importing references from: %s" %
                  path)  #@UndefinedVariable

        self.thread = ReferenceImporterThread(str(path), self)
        # Connect thread signals
        self.connect(self.thread, QtCore.SIGNAL("finished()"), self.finish)
        self.connect(self.thread, QtCore.SIGNAL("terminated()"), self.finish)
        self.thread.start()
コード例 #26
0
    def _apply_single_input(self, input):
        log.debug('Applying RegexRule with pattern %s' % self.pattern) #@UndefinedVariable

        try:
            regex = re.compile(self.pattern)
            input = input.strip()
            matches = re.search(regex, input)
        except Exception, e:
            log.error('Exception applying RegexRule with pattern %s: %s'  #@UndefinedVariable
                      % (self.pattern, e))
            return ''
コード例 #27
0
 def _prune_wrappers(self, wrappers):
     log.debug('Prunning %d wrappers.' % len(wrappers)) #@UndefinedVariable
     max = self.max_wrappers
     prunned = []
     for wrapper in wrappers:
         max -= 1
         mv = self.min_validity / 2.0
         if wrapper.score > mv or max >= 0:
             prunned.append(wrapper)
     log.debug('After prunning: %d wrappers' % len(prunned)) #@UndefinedVariable
     return prunned
コード例 #28
0
 def done(self, status):
     path = QtGui.QFileDialog.getSaveFileName(self,
         caption='Save references to file', filter='BibTeX (*.bib)')
     if not path:
         return
     log.debug('Saving to file: %s' % path) #@UndefinedVariable
     try:
         file = open(path, 'w')
         file.write(unicode(self.page01.ui.entriesEdit.toPlainText()))
         file.close()
     except Exception, e:
         log.error('Error saving references to %s' % e) #@UndefinedVariable
コード例 #29
0
    def _update_wrapper(self):
        """
        Updates last selected wrapper with any changed values of new rules
        """
        if not (self.wrapper_for_update and self.last_selected_wrapper):
            return
        log.debug('Updating last selected wrapper') #@UndefinedVariable
        
        wrapper = self.last_selected_wrapper.wrapper

        self._update_score(wrapper)
        self._update_rules(wrapper)
コード例 #30
0
ファイル: types.py プロジェクト: rxuriguera/bibtexIndexMaker
 def extract_info(self, input):
     """
     Applies the rules' chain to extract the piece of information.
     """
     log.debug('Applying ruled wrapper') #@UndefinedVariable
     result = input
     for rule in self.rules:
         if result:
             result = rule.apply(result)
         else:
             break
     return result
コード例 #31
0
 def _prune_wrappers(self, wrappers):
     log.debug('Prunning %d wrappers.' % len(wrappers))  #@UndefinedVariable
     max = self.max_wrappers
     prunned = []
     for wrapper in wrappers:
         max -= 1
         mv = self.min_validity / 2.0
         if wrapper.score > mv or max >= 0:
             prunned.append(wrapper)
     log.debug('After prunning: %d wrappers' %
               len(prunned))  #@UndefinedVariable
     return prunned
コード例 #32
0
 def _rule_example(self, example):
     log.debug('Ruling example with PathRuler. Value %s' % #@UndefinedVariable
               str(example.value))
     rules = []
     element_rules = []
     for element in self._get_content_elements(example.value,
                                               example.content):
         rule = self._rule_element(example, element)
         if rule:
             element_rules.append(rule)
     self._merge_rules(rules, element_rules)
     return rules
コード例 #33
0
 def initializePage(self):
     log.addHandler(self.guihandler) #@UndefinedVariable
     path = self.field('filePath').toPyObject()
     log.debug("Starting importing references from: %s" % path) #@UndefinedVariable
     
     self.thread = ReferenceImporterThread(str(path), self)
     # Connect thread signals
     self.connect(self.thread, QtCore.SIGNAL("finished()"),
                  self.finish)
     self.connect(self.thread, QtCore.SIGNAL("terminated()"),
                  self.finish)
     self.thread.start()
コード例 #34
0
    def _update_wrapper(self):
        """
        Updates last selected wrapper with any changed values of new rules
        """
        if not (self.wrapper_for_update and self.last_selected_wrapper):
            return
        log.debug('Updating last selected wrapper')  #@UndefinedVariable

        wrapper = self.last_selected_wrapper.wrapper

        self._update_score(wrapper)
        self._update_rules(wrapper)
コード例 #35
0
 def _rule_example(self, example):
     log.debug('Ruling example with RegexRuler') #@UndefinedVariable
     rules = []
     
     if type(example.content) is str or type(example.content) is unicode:
         example.content = [example.content]
     
     for element in example.content:
         rule = self._rule_example_content(example.value, element)
         if rule:
             rules.append(rule)
     return rules
コード例 #36
0
 def _format_references(self):
     if self.populating:
         return
     
     log.debug('Item checked/unchecked') #@UndefinedVariable
     self.ui.entriesEdit.setText('Updating...')
     
     items = self._get_checked_items()
     log.debug('Items selected: %d' % len(items)) #@UndefinedVariable
     
     self.thread.items = items
     self.thread.start()
コード例 #37
0
 def apply(self, input):
     log.debug('Applying MultiValueRegexRule with pattern %s' % #@UndefinedVariable
               (str(self.pattern)[:30])) 
     results = []
     regex = re.compile(self.pattern)
     for string in input:
         if len(string) > MAX_ELEM_CONTENT_LEN:
             continue
         matches = re.match(regex, string)
         if matches and len(matches.groups()) > 0:
             results.append(matches.group(1))
     return results
コード例 #38
0
 def _validate_reference_fields(self, reference, raw_text):
     """
     This method is a complement for _use_reference_wrappers 
     """
     log.debug('Validating reference fields') #@UndefinedVariable
     for field_name in reference.fields:
         field = reference.get_field(field_name)
         try:
             validator = self.field_validation[field_name][1]
         except KeyError:
             validator = None
             
         valid = validator.validate(field.value, raw_text) if validator else True
         field.valid = valid
コード例 #39
0
    def _validate_reference_fields(self, reference, raw_text):
        """
        This method is a complement for _use_reference_wrappers 
        """
        log.debug('Validating reference fields')  #@UndefinedVariable
        for field_name in reference.fields:
            field = reference.get_field(field_name)
            try:
                validator = self.field_validation[field_name][1]
            except KeyError:
                validator = None

            valid = validator.validate(field.value,
                                       raw_text) if validator else True
            field.valid = valid
コード例 #40
0
 def apply(self, input):
     log.debug('Applying PersonRule') #@UndefinedVariable
     if not type(input) == list:
         return []
     
     names = []
     for person in input:
         person = re.sub('\d', '', person)
         person = person.strip()
         if not person:
             continue
         
         name = split_name(person)
         if name:
             names.append(name)
     return names