def prepareGloveModel(): ConfigReader('../conf/smartanno_conf.json') glove_path = ConfigReader.getValue('glove/model_path') glove_vocab = ConfigReader.getValue('glove/vocab') glove_vector = ConfigReader.getValue('glove/vector') GloveModel(word2vec_file=glove_path, vocab=glove_vocab, vect=glove_vector) gm = GloveModel.glove_model
def start(self): if ConfigReader.getValue("glove/model_path") is None or len(ConfigReader.getValue("glove/model_path")) == 0: self.workflow.steps[self.pos_id + 2].start() return if not hasattr(self.workflow, 'we_extended'): self.workflow.we_extended = dict() rows = self.showWords(self.workflow.filters) self.box = widgets.VBox(rows, layout=widgets.Layout(display='flex', flex_grown='column')) display(self.box) pass
def loadDefaultConfig(self): if self.sources is None or len(self.sources) == 0: self.sources = ['SNOMEDCT_US'] ConfigReader.setValue("umls/sources", self.sources) if self.filter_by_length is None: self.filter_by_length = 0 ConfigReader.setValue("umls/filter_by_length", 0) if self.filter_by_contains is None: self.filter_by_contains = True ConfigReader.setValue("umls/filter_by_contains", True) if self.max_query is None: self.max_query = 50 ConfigReader.setValue("umls/max_query", 50) ConfigReader.saveConfig() pass
def __init__(self, name=None): super().__init__(name) self.dao = None self.dbpath = '' self.remove_old = False self.dataset_name = 'orig' self.whoosh_root = ConfigReader.getValue("whoosh/root_path") self.html1 = widgets.HTML('<h4>Give a name for this dataset: </h4>') self.dataset_name_input = None self.html2 = None self.toggle = widgets.ToggleButtons( options=['TextBlob_Splitter', 'PyRuSh', 'Not_To_Split'], description='', disabled=False, value='Not_To_Split', button_style='', # 'success', 'info', 'warning', 'danger' or '' tooltips=[ 'Use TextBlob sentence splitter', 'Use PyRuSH to split sentences', 'don\'t split' ], layout=widgets.Layout(width='70%') # icons=['check'] * 3 ) self.data_step = None pass
def testRBLoop(self): logging.getLogger().setLevel(logging.WARN) ConfigReader() wf = Workflow() rb = ReviewRBInit(name="rb_review_init") wf.append(rb) rv = ReviewRBLoop(name='rb_review') wf.append(rv) wf.append( PreviousNextHTML( '<h2>Welcome to SmartAnno!</h2><h4>First, let's import txt data from a directory. </h4>', name='intro')) wf.filters = {'TypeA': ['heart'], 'TypeB': ['exam']} wf.types = ['TypeA', 'TypeB'] wf.task_id = 1 wf.umls_extended = {} wf.we_extended = {} wf.dao = Dao('sqlite+pysqlite:///data/demo.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.start() if len(rb.branch_buttons) == 0: # if no records in the db, the optional buttons won't show rb.sample_size_input.value = 3 rb.complete() wf.start() print([doc.DOC_ID for doc in rb.data['docs']]) print([ anno.REVIEWED_TYPE for anno in wf.steps[0].data['annos'].values() ]) rb.sample_size_input.value = 1 rb.navigate(rb.branch_buttons[1]) pass
def __init__(self, description='', name=str(Step.global_id + 1), ml_classifier_cls=LogisticBOWClassifier): self.sample_size_input = None self.percent_slider = None self.samples = {"contain": [], "notcontain": []} self.box = None self.data = None self.docs = None self.annos = None self.reviewed_docs = None self.reviewed_pos = None self.leftover = None self.ready = False # reset, continue, addmore, self.move_next_option = '' self.previousReviewed = OrderedDict() self.learning_pace = ConfigReader.getValue('review/ml_learning_pace') self.un_reviewed = 0 self.parameters = dict() self.parameter_inputs = dict() self.ml_classifier_cls = ml_classifier_cls super().__init__(name=name) pass
def __init__(self, targets=None, modifiers=None, feature_inference_rule=None, document_inference_rule=None, rush_rule=None, expected_values=[], save_markups=True): self.document_inferencer = DocumentInferencer(document_inference_rule) self.feature_inferencer = FeatureInferencer(feature_inference_rule) self.conclusions = [] self.modifiers = modifiers self.targets = targets self.save_markups = save_markups self.expected_values = [value.lower() for value in expected_values] self.saved_markups_map = dict() self.pyrush = None if rush_rule is None or not os.path.isfile(rush_rule): rush_rule = ConfigReader.getValue('rush_rules_path') if rush_rule is not None and os.path.isfile(rush_rule): self.pyrush = RuSH(rush_rule) else: logMsg(("File not found", os.path.abspath(rush_rule))) self.last_doc_name = '' if modifiers is not None and targets is not None: if isinstance(modifiers, str) and isinstance(targets, str): if (modifiers.endswith('.csv') or modifiers.endswith('.tsv') or modifiers.endswith( '.txt') or modifiers.endswith('.yml')) \ and (targets.endswith('.csv') or targets.endswith('.tsv') or targets.endswith( '.txt') or targets.endswith('.yml') or targets.startswith('Lex\t')): self.setModifiersTargetsFromFiles(modifiers, targets) else: self.setModifiersTargets(modifiers, targets) RBDocumentClassifier.instance = self
def __init__(self, apikey=None): # self.username=username # self.password=password if apikey is not None: self.apikey = apikey else: self.apikey = ConfigReader.getValue('api_key') self.service = "http://umlsks.nlm.nih.gov"
def start(self): if not hasattr(self.workflow, 'dao') or self.workflow.dao is None: print(self.workflow.config_file) self.dbpath = ConfigReader(self.workflow.config_file).getValue('db_path') self.db_config = ConfigReader(self.workflow.config_file).getValue('db_header') + self.dbpath if os.path.isfile(self.dbpath): self.initDao(self.db_config) self.displayOptions() else: self.initDao(self.db_config) self.createSQLTables() self.need_import = True self.next_step.start() else: self.next_step.start() pass
def saveGloveConfig(self): self.glove_path = self.glove_path_input.value self.glove_vocab = self.glove_vocab_input.value self.glove_vector = self.glove_vector_input.value self.workflow.glove_path = self.glove_path ConfigReader.setValue("glove/vocab", int(self.glove_vocab)) ConfigReader.setValue("glove/vector", int(self.glove_vector)) ConfigReader.setValue("glove/model_path", self.glove_path) ConfigReader.saveConfig() pass
def __init__(self, description='', name=None): self.glove_path = ConfigReader.getValue('glove/model_path') self.glove_vocab = ConfigReader.getValue('glove/vocab') self.glove_vector = ConfigReader.getValue('glove/vector') # widgets to take the user inputs self.glove_path_input = None self.glove_vocab_input = None self.glove_vector_input = None self.api_key_input = None if self.glove_vocab is None: self.glove_vocab = 1900000 if self.glove_vector is None: self.glove_vector = 300 self.html = widgets.HTML(value=description) super().__init__(name) pass
def complete(self): no_word_selected = True for type_name, toggle in self.to_umls_ext_filters.items(): self.to_ext_words[type_name] = TreeSet(toggle.value) if no_word_selected and len(self.to_ext_words[type_name]) > 0: no_word_selected = False if not no_word_selected: self.workflow.to_ext_words = self.to_ext_words if self.api_key is None: self.api_key = self.api_input.value self.workflow.api_key = self.api_key ConfigReader.setValue("api_key", self.api_key) ConfigReader.saveConfig() else: self.setNextStep(self.workflow.steps[self.pos_id + 2]) self.workflow.steps[self.pos_id + 2].setPreviousStep(self) super().complete() pass
def __init__( self, description='<h4>Extend keywords through <b>UMLS</b></h4><p>Please select which keywords you want to ' 'check the synonyms from UMLS:', name=None): self.api_key = ConfigReader.getValue('api_key') self.title = widgets.HTML(value=description) self.to_ext_words = dict() self.to_umls_ext_filters = dict() self.api_input = None super().__init__(name)
def __init__(self, name=str(Step.global_id + 1), **kwargs): super().__init__([], name=name) self.docs = [] self.data = dict() self.annos = dict() self.reviewed_docs = dict() self.threshold = ConfigReader.getValue('review/rb_model_threshold') self.nlp = None self.js = '''<script> function setFocusToTextBox(){ var spans = document.getElementsByClassName("highlighter"); var id=document.getElementById('d1').pos if (id===undefined){ id=0 } if (id>=spans.length){ id=0 } var topPos = spans[id].offsetTop; dv=document.getElementById('d1') dv.scrollTop = topPos-20; dv.pos=id+1; } </script>''' self.end_js = '''<script>document.getElementById('d1').pos=0;topPos=0;</script>''' self.matcher = None self.metaColumns = ConfigReader().getValue("review/meta_columns") self.div_height = ConfigReader().getValue("review/div_height") logMsg(('self.div_height:', self.div_height)) self.show_meta_name = ConfigReader().getValue("review/show_meta_name") self.hightligh_span_tag = ' <span class="highlighter" style="background-color: %s ">' % ConfigReader( ).getValue("review/highlight_color") if 'rush_rule' in kwargs: self.rush_rule = kwargs['rush_rule'] else: self.rush_rule = ConfigReader.getValue('rush_rules_path') pass
def requestUMLSAPIKey(self, rows): api_key = ConfigReader.getValue("api_key") if api_key is None or len(api_key) == 0: rows.append( widgets.HTML( value= '<h4>Set up your Glove model</h4><p>In order to use word embedding, you need ' 'to tell where the glove model locates:</p>')) self.api_key_input = widgets.Text(value='', placeholder='', description='', disabled=False) rows.append(self.api_key_input) rows += self.addSeparator()
def requestUMLSAPIKey(self, rows): api_key = ConfigReader.getValue("api_key") if api_key is None or len(api_key) == 0: rows.append( widgets.HTML( value='<h4>Set your API Key</h4><p>In order to use the UMLS synonym checking module, you need to set' ' up your API key: (<a href="https://www.nlm.nih.gov/research/umls/user_education/quick_tours/' 'UTS-API/UTS_REST_API_Authentication.html" target="_blank">How to get your API Key_at 01:12 from' ' beginning. </a>)</p><p>If you do not set the api key, the UMLS synonym extender will be ' '<b>skipped</b>.</p>')) self.api_key_input = widgets.Text(value='', placeholder='', description='', disabled=False) rows.append(self.api_key_input) rows += self.addSeparator()
def __init__(self, **kwargs): self.sample_size = 0 self.previous_sampled_ids = kwargs['previous_sampled_ids'] self.dao = kwargs['dao'] self.dataset_id = 'origin_doc' if 'dataset_id' not in kwargs else kwargs[ 'dataset_id'] self.ignore_case = True self.whoosh_root = ConfigReader.getValue('whoosh/root_path') self.grouped_ids = dict() self.all_contain_ids = set() self.available_not_contain = 0 self.new_available_not_contain = 0 self.new_ids = dict() self.current_stats = dict() pass
def __init__(self, description='', name=str(Step.global_id + 1), sampler_cls: type = KeywordStratefiedSampler): super().__init__(name=name) self.toggle = widgets.ToggleButtons( options=sample_options, value=sample_options[-1], description='What to do with previously sampled data? ', style=dict(description_width='initial'), button_style='info') self.toggle.observe(self.onPreviousSampleHandleChange) self.sample_size_input = widgets.BoundedIntText( value=0, min=0, max=0, step=1, description='Total documents you want to sample:', style=dict(description_width='initial')) self.sample_size_input.observe(self.onSampleConfigChange) self.sampler_cls = sampler_cls self.sampled_summary = widgets.HTML(value='') self.percent_slider = widgets.IntSlider(value=70, min=0, max=100, step=5, description='', disabled=False, continuous_update=False, orientation='horizontal', readout=True, readout_format='d') self.percent_slider.observe(self.onSampleConfigChange) # save DOC_IDs that contain or not contain keywords filters (used in sampling strategy) self.samples = {"contain": [], "notcontain": []} self.box = None self.data = {'docs': [], 'annos': OrderedDict()} self.ready = False # reset, continue, addmore, self.move_next_option = '' self.total = None self.total_contains = None self.un_reviewed = 0 self.sampler = None self.samples = dict() self.current_stats = dict() self.max_threshold = ConfigReader.getValue("review/rb_model_threshold") self.sample_sizes = dict()
def navigate(self, button): if self.glove_path_input is not None: self.saveGloveConfig() if self.api_key_input is not None: self.saveAPIKey() else: self.workflow.api_key = ConfigReader.getValue("api_key") self.backgroundWork() if button.description == 'ContinueReviewing': self.workflow.to_continue = True self.workflow.steps[1].start() self.workflow.steps[1].complete() else: self.workflow.to_continue = False self.workflow.steps[1].start() pass
def evaluate(task_name='language', classifiers=[LogisticBOWClassifiers]): ConfigReader() dbi = DBInitiater(name='db_initiator') tc = TaskChooser(name='tasknamer') dsc = DataSetChooser( name='dataset_chooser', description='<h4>Choose which dateaset you want to use: </h4>') anno_type = AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types') kf = KeywordsFiltering(name='keywords') ri = ReviewRBInit(name="rb_review_init") # mi=ReviewMLInit(name='ml_review_init') # ml=ReviewMLLoop(name='ml_review', ml_classifier_cls=SVMBOWClassifier) wf = Workflow([ dbi, dsc, anno_type, kf, ri # ,mi,ml ]) wf.task_name = task_name wf.start() dbi.complete() dsc.complete() anno_type.complete() kf.complete() ri.complete() for key, value in wf.samples.items(): print(key, len(value)) docs = wf.samples['docs'] annos = wf.samples['annos'] reviewed_docs = { doc_id: anno.REVIEWED_TYPE for doc_id, anno in annos.items() if anno.REVIEWED_TYPE is not None } x = [doc.TEXT for doc in docs[:len(reviewed_docs)]] y = list(reviewed_docs.values()) print(y) logging.getLogger().setLevel(logging.DEBUG) for cl in classifiers: cl_instance = cl(task_name=task_name) print("\n\nReport performance of {}:".format(cl.__name__)) cl_instance.train(x, y)
def start(self): cr = ConfigReader() self.workflow = Workflow([ IntroStep( '<h2>Welcome to SmartAnno!</h2><h4>Do you want to start from beginning or continue previous reviewing? </h4>', name='intro'), DBInitiater(name='db_initiator'), DirChooser(name='choosedir'), ReadFiles(name='readfiles'), DocsToDB(name='save2db'), TaskChooser(name='tasknamer'), DataSetChooser( name='dataset_chooser', description='<h4>Choose which dateaset you want to use: </h4>' ), AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types'), KeywordsFiltering(name='keywords'), # PreviousNextIntSlider(value=60, min=0, max=100, step=10, # description='<h4>Percentage to Filter: </h4><p>Choose how many percent of the samples ' # 'you want to use the keywords filter.</p>', name='percent2filter'), KeywordsUMLSExtenderSetup(name='umls_extender_setup'), KeywordsUMLSExtender( name='umls_extender', sources=cr.getValue("umls/sources"), filter_by_length=cr.getValue("umls/filter_by_length"), filter_by_contains=cr.getValue("umls/filter_by_contains"), max_query=cr.getValue("umls/max_query")), KeywordsEmbeddingExtenderSetup(name='w_e_extender_setup'), KeywordsEmbeddingExtender(name='w_e_extender', max_query=40), ReviewRBInit(name="rb_review_init"), ReviewRBLoop(name='rb_review'), PreviousNextHTML( description= '<h2>Congratuations!</h2><h4>You have finished the initial review ' 'on the rule-base preannotations. </h4>', name='rb_review_done'), ReviewMLInit(name='ml_review_init'), ReviewMLLoop(name='ml_review', ml_classifier_cls=self.ml_classifier_cls), PreviousNextHTML( name='finish', description= '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. ' ) ]) self.workflow.start(False) pass
def init_real_time(self): self.ml_classifier = self.ml_classifier_cls( task_name=self.workflow.task_name) self.learning_pace = ConfigReader.getValue("review/ml_learning_pace") self.loop_workflow.filters = self.workflow.filters self.readData() if self.ml_classifier_cls.status == NotTrained: self.backgroundTraining() self.nlp = ReviewRBInit.nlp self.matcher = ReviewRBInit.matcher logMsg([doc.DOC_ID for doc in self.docs]) if self.docs is not None and len( self.docs) > 0 and (self.loop_workflow is None or len(self.loop_workflow.steps) == 0): last_doc_pos = len(self.reviewed_docs) + 1 if len( self.reviewed_docs) < len(self.docs) else len( self.reviewed_docs) for i in range(0, last_doc_pos): doc = self.docs[i] content = self.genContent(doc) reviewed = False if doc.DOC_ID in self.annos and self.annos[ doc.DOC_ID].REVIEWED_TYPE is not None: prediction = self.annos[doc.DOC_ID].REVIEWED_TYPE reviewed = True else: prediction = self.getPrediction(doc) repeat_step = ReviewML( description=content, options=self.workflow.types, value=prediction, js=self.js, master=self, reviewed=reviewed, button_style=('success' if reviewed else 'info')) self.appendRepeatStep(repeat_step) pass
from SmartAnno.utils.ConfigReader import ConfigReader from SmartAnno.gui.Workflow import Workflow from SmartAnno.utils.AnnotationTypeDef import AnnotationTypeDef from SmartAnno.utils.KeywordsFiltering import KeywordsFiltering from SmartAnno.gui.PreviousNextWidgets import PreviousNextHTML from SmartAnno.utils.ReviewRBInit import ReviewRBInit from SmartAnno.utils.ReviewRBLoop import ReviewRBLoop from SmartAnno.utils.ReviewMLInit import ReviewMLInit from SmartAnno.utils.ReviewMLLoop import ReviewMLLoop from SmartAnno.models.logistic.LogisticBOWClassifiers import LogisticBOWClassifier from SmartAnno.utils.DataSetChooser import DataSetChooser logging.getLogger().setLevel(logging.DEBUG) ConfigReader('../conf/smartanno_conf.json') wf = Workflow(config_file=ConfigReader.config_file) wf.api_key = ConfigReader.getValue("api_key") wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.task_name = 'language' wf.append( AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types')) wf.append(KeywordsFiltering(name='keywords')) wf.append( DataSetChooser(
def restoreStatus(self): status = ConfigReader.getValue('status/' + self.name) if status is None or status == '': status = 0 return status
def updateStatus(self, status=None): if status is not None: self.status = status ConfigReader.saveStatus(self.status, status_key='status/' + self.name)
from SmartAnno.utils.ConfigReader import ConfigReader from SmartAnno.umls.UMLSFinder import UMLSFinder ConfigReader() umls = UMLSFinder(ConfigReader.getValue("api_key"), sources=[], filter_by_length=5, max_query=50, filter_by_contains=True) print(umls.search("ketoacidosis"))
# ConfigReader(config_file='../conf/smartanno_conf2.json').saveStatus(0) from SmartAnno.utils.ConfigReader import ConfigReader cr=ConfigReader(config_file='conf/smartanno_conf.json.bk')
from SmartAnno.utils.ConfigReader import ConfigReader from SmartAnno.gui.PreviousNextWidgets import PreviousNextHTML from SmartAnno.gui.Workflow import Workflow from SmartAnno.utils.IntroStep import IntroStep ConfigReader('../conf/smartanno_conf2.json') intro = IntroStep( '<h2>Welcome to SmartAnno!</h2><h4>First, let's import txt data from a directory. </h4>', name='intro') wf = Workflow([ intro, PreviousNextHTML( name='finish', description= '<h3>Well done!</h3><h4>Now you have finished reviewing all the samples. ' ) ]) wf.start() intro.navigate(intro.branch_buttons[0])
class DBInitiater(PreviousNext): """Import read documents into database""" def __init__(self, name=None): super().__init__(name) self.dao = None self.db_config = '' self.dbpath = '' self.need_import = False self.overwrite = False pass def start(self): if not hasattr(self.workflow, 'dao') or self.workflow.dao is None: print(self.workflow.config_file) self.dbpath = ConfigReader(self.workflow.config_file).getValue('db_path') self.db_config = ConfigReader(self.workflow.config_file).getValue('db_header') + self.dbpath if os.path.isfile(self.dbpath): self.initDao(self.db_config) self.displayOptions() else: self.initDao(self.db_config) self.createSQLTables() self.need_import = True self.next_step.start() else: self.next_step.start() pass def backStart(self): self.workflow.dao = None self.start() pass def updateBox(self): rows = [self.html, self.toggle] + self.addSeparator(top='10px') + \ [self.html2, self.toggle2] + self.addSeparator(top='10px') + [ self.addPreviousNext(self.show_previous, self.show_next)] vbox = widgets.VBox(rows) vbox.layout.flex_grown = 'column' return vbox def initDao(self, dbfile): self.dao = Dao(self.db_config, sqlalchemy_dao.POOL_DISABLED) self.workflow.dao = self.dao self.workflow.dbpath = self.db_config[self.db_config.find(':///') + 4:] pass def displayOptions(self): clear_output() self.html = widgets.HTML( '<h4>Sqlite database "%s" exists, do you want to overwrite?</h4>' '<h4>choose <b>yes</b> will <span style="color:red"><b>clear all the data</b></span> in that database file</h4>' % self.dbpath) self.toggle = widgets.ToggleButtons( options=['Yes', 'No'], description='', disabled=False, value='No', button_style='', # 'success', 'info', 'warning', 'danger' or '' tooltips=['Replace the old database', 'Append data to the old database'], layout=widgets.Layout(width='70%') # icons=['check'] * 3 ) self.toggle.observe(self.on_click, 'value') self.html2 = widgets.HTML( '<h4>Do you want to import new data?</h4>') self.toggle2 = widgets.ToggleButtons( options=['Yes', 'No'], description='', disabled=False, value='No', button_style='', # 'success', 'info', 'warning', 'danger' or '' tooltips=['Add new data to db', 'Use existing data in db'], layout=widgets.Layout(width='70%') ) # pad the descriptions list if it is shorter than options list self.resetParameters() self.box = self.updateBox() display(self.box) pass def on_click(self, change): self.data = change['new'] if self.data == 'Yes': self.toggle2.value = 'Yes' pass def complete(self): clear_output(True) if self.toggle.value == 'Yes': os.remove(self.dbpath) self.dao = Dao(self.db_config, sqlalchemy_dao.POOL_DISABLED) self.createSQLTables() self.overwrite = True self.need_import = True else: self.dao = Dao(self.db_config, sqlalchemy_dao.POOL_DISABLED) if self.toggle2.value == 'Yes': self.need_import = True self.overwrite = False self.workflow.dao = self.dao if self.next_step is not None: if isinstance(self.next_step, Step): if self.workflow is not None: self.workflow.updateStatus(self.next_step.pos_id) self.next_step.start() else: raise TypeError( 'Type error for ' + self.name + '\'s next_step. Only Step can be the next_step, where its next_step is ' + str( type(self.next_step))) else: print("next step hasn't been set.") pass def createSQLTables(self): Model.metadata.create_all(bind=self.dao._engine) pass
import sqlalchemy_dao from sqlalchemy_dao import Dao from SmartAnno.utils.ConfigReader import ConfigReader from SmartAnno.db.ORMs import Filter from SmartAnno.gui.Workflow import Workflow from SmartAnno.utils.AnnotationTypeDef import AnnotationTypeDef from SmartAnno.utils.IntroStep import IntroStep from SmartAnno.utils.KeywordsFiltering import KeywordsFiltering from SmartAnno.utils.KeywordsUMLSExtender import KeywordsUMLSExtender from SmartAnno.utils.KeywordsUMLSExtenderSetup import KeywordsUMLSExtenderSetup logging.getLogger().setLevel(logging.DEBUG) ConfigReader('../conf/smartanno_conf.json') wf = Workflow(config_file=ConfigReader.config_file) wf.api_key = ConfigReader.getValue("api_key") wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.task_name = 'language' wf.append(AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types')) wf.append(KeywordsFiltering( name='keywords')) wf.append(KeywordsUMLSExtenderSetup(name='umls_extender_setup')) wf.append(KeywordsUMLSExtender(name='umls_extender', sources=ConfigReader.getValue("umls/sources"), filter_by_length=ConfigReader.getValue("umls/filter_by_length"), filter_by_contains=ConfigReader.getValue("umls/filter_by_contains"),