def replace(self, pos: int, new_step: Step): if pos >= len(self): logMsg( self, 'insert position is greater than the current length of steps. Append to the end instead.' ) self.append(new_step) elif pos < 0: logMsg( self, 'The position to delete is greater than the current length of steps. Ignore this command.' ) return else: old_name = self.steps[pos].name self.steps[pos] = new_step new_step.pos_id = pos new_step.setWorkflow(self) if pos > 0: self.steps[pos - 1].setNextStep(self.steps[pos]) self.steps[pos].setPreviousStep(self.steps[pos - 1]) if pos < len(self) - 1: self.steps[pos].setNextStep(self.steps[pos + 1]) self.steps[pos + 1].setPreviousStep(self.steps[pos]) del self.name_dict[old_name] self.name_dict[new_step.name] = pos pass
def remove(self, pos: int): if pos >= len(self): logMsg( self, 'The position to delete is greater than the current length of steps. Ignore this command.' ) return elif pos < 0: logMsg( self, 'The position to delete is greater than the current length of steps. Ignore this command.' ) return else: self.steps.__delitem__(pos) if pos > 0: self.steps[pos - 1].setNextStep(self.steps[pos]) for step_id in range(pos, len(self)): step = self.steps[step_id] step.pos_id = step_id step.setPreviousStep(self.steps[pos - 1]) if step_id < len(self) - 1: step.setNextStep(self.steps[pos + 1]) self.name_dict[step.name] = step_id pass
def load(self, config_file): if not os.path.isfile(config_file): current_dir = os.getcwd() logMsg('current_dir=' + current_dir) root_pos = current_dir.rfind(os.sep + 'SmartAnno' + os.sep) root = current_dir[:root_pos + 11] if root_pos > 0 else current_dir self.root = root logMsg('root=' + root) config_file = os.path.join(root, config_file) if os.path.isfile(config_file): with open(config_file, 'r') as f: ConfigReader.configurations = json.load(f) else: if not os.path.exists(os.path.join(root, 'conf')): os.makedirs(os.path.join(root, 'conf')) file_name = 'smartanno_conf.json' conf_path = os.path.join(self.root, 'conf') if not os.path.exists(os.path.join(conf_path, file_name)): shutil.copyfile(self.getDefaultResourceFile(file_name + '.bk'), config_file) with open(config_file, 'r') as f: ConfigReader.configurations = json.load(f) logError('File "' + config_file + '" doesn\'t exist, create ' + os.path.abspath(config_file) + ' using default settings.') ConfigReader.config_file = config_file self.setUpDirs() self.dumpDefaultRules()
def saveToWhoosh(self, df, dataset_id, overwrite=False): # use whoosh search engine to enable full text search if not os.path.exists(self.whoosh_root): os.mkdir(self.whoosh_root) ws_path = os.path.join(self.whoosh_root, dataset_id) if not os.path.exists(ws_path): os.mkdir(ws_path) logMsg( str(os.path.abspath(ws_path)) + ' does not exist, create it to store whoosh index') overwrite = True elif overwrite: shutil.rmtree(ws_path) os.mkdir(ws_path) schema = Schema(DOC_ID=NUMERIC(stored=True), TEXT=TEXT) if overwrite: ix = create_in(ws_path, schema) else: ix = open_dir(ws_path) writer = AsyncWriter(ix) with self.workflow.dao.create_session() as session: doc_iter = session.query(Document).filter( Document.DATASET_ID == dataset_id) for doc in doc_iter: writer.add_document(DOC_ID=doc.DOC_ID, TEXT=doc.TEXT) writer.commit() pass
def complete(self): clear_output(True) if self.next_step is not None: logMsg((self, 'workflow complete')) if isinstance(self.next_step, Step): if self.workflow is not None: self.workflow.updateStatus(self.next_step.pos_id) self.next_step.start() else: raise TypeError( 'Type error for ' + self.name + '\'s next_step. Only Step can be the next_step, where its next_step is ' + str(type(self.next_step))) else: print("next step hasn't been set.") pass
def insert(self, pos: int, new_step: Step): if pos >= len(self): logMsg( self, 'insert position is greater than the current length of steps. Append to the end instead.' ) self.append(new_step) else: self.steps.insert(pos, new_step) self.steps[pos].setWorkflow(self) if pos > 0: self.steps[pos - 1].setNextStep(self.steps[pos]) for step_id in range(pos, len(self)): step = self.steps[step_id] step.pos_id = step_id step.setPreviousStep(self.steps[pos - 1]) if step_id < len(self) - 1: step.setNextStep(self.steps[pos + 1]) self.name_dict[step.name] = step_id pass
def append(self, new_step): # if not isinstance(new_step, type(Step)): # raise TypeError( # 'Type error for ' + new_step + 'th steps. Only Step can be the next_step, where its next_step is ' # + str(type(new_step))) previous_step = None if len(self.steps) > 0: previous_step = self.steps[-1] step_id = len(self.steps) new_step.pos_id = step_id new_step.setWorkflow(self) self.steps.append(new_step) self.name_dict[new_step.name] = step_id if previous_step is not None: logMsg((self, 'attache new step ' + new_step.name + ' ' + str(step_id))) new_step.setPreviousStep(previous_step) previous_step.setNextStep(new_step) pass
def train(self, x, y): logMsg('error, abstract method called') # [] to return Documents, dict() to return grouping information pass