def evaluate(task_name='language', classifiers=[LogisticBOWClassifiers]): ConfigReader() dbi = DBInitiater(name='db_initiator') tc = TaskChooser(name='tasknamer') dsc = DataSetChooser( name='dataset_chooser', description='<h4>Choose which dateaset you want to use: </h4>') anno_type = AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types') kf = KeywordsFiltering(name='keywords') ri = ReviewRBInit(name="rb_review_init") # mi=ReviewMLInit(name='ml_review_init') # ml=ReviewMLLoop(name='ml_review', ml_classifier_cls=SVMBOWClassifier) wf = Workflow([ dbi, dsc, anno_type, kf, ri # ,mi,ml ]) wf.task_name = task_name wf.start() dbi.complete() dsc.complete() anno_type.complete() kf.complete() ri.complete() for key, value in wf.samples.items(): print(key, len(value)) docs = wf.samples['docs'] annos = wf.samples['annos'] reviewed_docs = { doc_id: anno.REVIEWED_TYPE for doc_id, anno in annos.items() if anno.REVIEWED_TYPE is not None } x = [doc.TEXT for doc in docs[:len(reviewed_docs)]] y = list(reviewed_docs.values()) print(y) logging.getLogger().setLevel(logging.DEBUG) for cl in classifiers: cl_instance = cl(task_name=task_name) print("\n\nReport performance of {}:".format(cl.__name__)) cl_instance.train(x, y)
from SmartAnno.utils.ReviewRBInit import ReviewRBInit from SmartAnno.utils.ReviewRBLoop import ReviewRBLoop from SmartAnno.utils.ReviewMLInit import ReviewMLInit from SmartAnno.utils.ReviewMLLoop import ReviewMLLoop from SmartAnno.models.logistic.LogisticBOWClassifiers import LogisticBOWClassifier from SmartAnno.utils.DataSetChooser import DataSetChooser logging.getLogger().setLevel(logging.DEBUG) ConfigReader('../conf/smartanno_conf.json') wf = Workflow(config_file=ConfigReader.config_file) wf.api_key = ConfigReader.getValue("api_key") wf.dao = Dao('sqlite+pysqlite:///../data/test.sqlite', sqlalchemy_dao.POOL_DISABLED) wf.task_name = 'language' wf.append( AnnotationTypeDef( '<h3>Annotation types:</h3><p>List all the types you want to identify below. Each type per line.<br/>If you' 'have too many types, try set up them separately, so that you won't need to choose from a long list ' 'for each sample. </p>', name='types')) wf.append(KeywordsFiltering(name='keywords')) wf.append( DataSetChooser( name='dataset_chooser', description='<h4>Choose which dateaset you want to use: </h4>')) rb = ReviewRBInit(name="rb_review_init") wf.append(rb) rv = ReviewRBLoop(name='rb_review', rush_rule='../conf/rush_rules.tsv') wf.append(rv)