def test_english_detection(self): from translation import Translator my_translator = Translator(None) result = scraper.scrape("http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNFY1KzEAhaiZchzd5ulmoY4_4P8kA&url=http://vov.vn/Van-hoa/NSND-Thanh-Hoa-xuc-dong-hat-truoc-benh-nhan/228256.vov") self.assertFalse(result.get('unscrapable')) text_obj = process_resources.extract_clean_content(result['htmlContent']) self.assertFalse(my_translator.is_english(text_obj['content']))
def test_english_detection(self): import config from translation import Translator my_translator = Translator(config) self.assertFalse(my_translator.is_english(""" Bệnh viêm não năm nay đã xuất hiện nhiều dấu hiệu bất thường... """))
def test_english_translation(self): import config from translation import Translator my_translator = Translator(config) result = scraper.scrape("http://peninsulardigital.com/municipios/comondu/refuerzan-acciones-contra-el-dengue/155929") text_obj = process_resources.extract_clean_content(result['htmlContent']) translation_obj = my_translator.translate_to_english(text_obj['content']) self.assertFalse(translation_obj.get('error'))
def setUp(self): self.t = Translator() self.s = type(self).FakeState() self.t._state = self.s self.d = StenoDictionary() self.dc = StenoDictionaryCollection() self.dc.set_dicts([self.d]) self.t.set_dictionary(self.dc)
class GuiPart: def __init__(self, master, queue, endCommand, exportDict, dictType, strokeClass): # Initialization for steno-specific actions self.translator = Translator(30, exportDict, dictType, strokeClass) self.translator.subscribe(self.emitted) self.translationFile = open("log.txt", "w") self.dictType = dictType # Keep track of events from the serial port. self.queue = queue # Set up the GUI frame = tkinter.Frame() frame.pack( expand = tkinter.YES, fill = tkinter.BOTH ) frame.master.title( "Plover, The Open Source Steno Program" ) frame.master.geometry( "950x50" ) frame.message1 = tkinter.StringVar() frame.line1 = tkinter.Label( frame, textvariable = frame.message1 ) frame.message1.set( "Plover for Gemini PR -- http://plover.stenoknight.com" ) frame.line1.pack() self.message2 = tkinter.StringVar() frame.line2 = tkinter.Label( frame, textvariable = self.message2 ) self.message2.set( "Dictionary Format: %s" % dictType ) frame.line2.pack() frame.pack() def processIncoming(self): """ Handle all the messages currently in the Queue (if any). """ while self.queue.qsize(): try: # Process the raw steno from the serial port. x = self.queue.get(0) self.translator.translate(x) self.message2.set(self.translator.fullTranslation()) except queue.Empty: pass def emitted(self, translation) : if translation.isCorrection : tell = self.translationFile.tell() if translation.english : i = tell - (len(translation.english) + 1) else : i = tell - (len(translation.rtfcre) + 1) self.translationFile.seek(i, 0) self.translationFile.truncate() else : if translation.english : out = translation.english else : out = translation.rtfcre self.translationFile.write(out + ' ') self.translationFile.flush()
def test_translate_calls_translate_stroke(self): t = Translator() s = stroke('S') def check(stroke, state, dictionary, output): self.assertEqual(stroke, s) self.assertEqual(state, t._state) self.assertEqual(dictionary, t._dictionary) self.assertEqual(output, t._output) with patch('plover.translation._translate_stroke', check) as _translate_stroke: t.translate(s)
class TranslatorStateSizeTestCase(unittest.TestCase): class FakeState(_State): def __init__(self): _State.__init__(self) self.restrict_calls = [] def restrict_size(self, n): self.restrict_calls.append(n) def assert_size_call(self, size): self.assertEqual(self.s.restrict_calls[-1], size) def assert_no_size_call(self): self.assertEqual(self.s.restrict_calls, []) def clear(self): self.s.restrict_calls = [] def setUp(self): self.t = Translator() self.s = type(self).FakeState() self.t._state = self.s self.d = StenoDictionary() self.dc = StenoDictionaryCollection() self.dc.set_dicts([self.d]) self.t.set_dictionary(self.dc) def test_dictionary_update_grows_size1(self): self.d[('S', )] = '1' self.assert_size_call(1) def test_dictionary_update_grows_size4(self): self.d[('S', 'PT', '-Z', 'TOP')] = 'hi' self.assert_size_call(4) def test_dictionary_update_no_grow(self): self.t.set_min_undo_length(4) self.assert_size_call(4) self.clear() self.d[('S', 'T')] = 'nothing' self.assert_size_call(4) def test_dictionary_update_shrink(self): self.d[('S', 'T', 'P', '-Z', '-D')] = '1' self.assert_size_call(5) self.clear() self.d[('A', 'P')] = '2' self.assert_no_size_call() del self.d[('S', 'T', 'P', '-Z', '-D')] self.assert_size_call(2) def test_dictionary_update_no_shrink(self): self.t.set_min_undo_length(7) self.d[('S', 'T', 'P', '-Z', '-D')] = '1' del self.d[('S', 'T', 'P', '-Z', '-D')] self.assert_size_call(7) def test_translation_calls_restrict(self): self.t.translate(stroke('S')) self.assert_size_call(0)
class TranslatorStateSizeTestCase(unittest.TestCase): class FakeState(_State): def __init__(self): _State.__init__(self) self.restrict_calls = [] def restrict_size(self, n): self.restrict_calls.append(n) def assert_size_call(self, size): self.assertEqual(self.s.restrict_calls[-1], size) def assert_no_size_call(self): self.assertEqual(self.s.restrict_calls, []) def clear(self): self.s.restrict_calls = [] def setUp(self): self.t = Translator() self.s = type(self).FakeState() self.t._state = self.s self.d = StenoDictionary() self.dc = StenoDictionaryCollection() self.dc.set_dicts([self.d]) self.t.set_dictionary(self.dc) def test_dictionary_update_grows_size1(self): self.d[('S',)] = '1' self.assert_size_call(1) def test_dictionary_update_grows_size4(self): self.d[('S', 'PT', '-Z', 'TOP')] = 'hi' self.assert_size_call(4) def test_dictionary_update_no_grow(self): self.t.set_min_undo_length(4) self.assert_size_call(4) self.clear() self.d[('S', 'T')] = 'nothing' self.assert_size_call(4) def test_dictionary_update_shrink(self): self.d[('S', 'T', 'P', '-Z', '-D')] = '1' self.assert_size_call(5) self.clear() self.d[('A', 'P')] = '2' self.assert_no_size_call() del self.d[('S', 'T', 'P', '-Z', '-D')] self.assert_size_call(2) def test_dictionary_update_no_shrink(self): self.t.set_min_undo_length(7) self.d[('S', 'T', 'P', '-Z', '-D')] = '1' del self.d[('S', 'T', 'P', '-Z', '-D')] self.assert_size_call(7) def test_translation_calls_restrict(self): self.t.translate(stroke('S')) self.assert_size_call(0)
def __init__( self, exportDic, dictType ): Frame.__init__( self ) self.pack( expand = YES, fill = BOTH ) self.master.title( "Plover, The Open Source Steno Program" ) self.master.geometry( "950x50" ) self.message1 = StringVar() self.line1 = Label( self, textvariable = self.message1 ) self.message1.set( "Plover for SideWinder X4 -- http://plover.stenoknight.com" ) self.line1.pack() self.message2 = StringVar() self.line2 = Label( self, textvariable = self.message2 ) self.message2.set( "Dictionary Format: %s" % dictType ) self.line2.pack() self.master.bind( "<KeyPress>", self.keyPressed ) self.master.bind( "<KeyRelease>", self.keyReleased ) # Initialization for steno-specific actions self.translator = Translator(30, exportDic, dictType, sidewinder.Stroke) self.translator.subscribe(self.emitted) self.downKeys = [] self.releasedKeys = [] self.translationFile = open("log.txt", "w") self.dictType = dictType
def __init__(self, master, queue, endCommand, exportDict, dictType, strokeClass): # Initialization for steno-specific actions self.translator = Translator(30, exportDict, dictType, strokeClass) self.translator.subscribe(self.emitted) self.translationFile = open("log.txt", "w") self.dictType = dictType # Keep track of events from the serial port. self.queue = queue # Set up the GUI frame = tkinter.Frame() frame.pack( expand = tkinter.YES, fill = tkinter.BOTH ) frame.master.title( "Plover, The Open Source Steno Program" ) frame.master.geometry( "950x50" ) frame.message1 = tkinter.StringVar() frame.line1 = tkinter.Label( frame, textvariable = frame.message1 ) frame.message1.set( "Plover for Gemini PR -- http://plover.stenoknight.com" ) frame.line1.pack() self.message2 = tkinter.StringVar() frame.line2 = tkinter.Label( frame, textvariable = self.message2 ) self.message2.set( "Dictionary Format: %s" % dictType ) frame.line2.pack() frame.pack()
def run(self): self.translator = translator = Translator() translator.compile_languages() translator.language = self.language self.bind(language=translator.setter('language')) global_idmap['_'] = translator super().run()
def _translation(self): result = [] text = self.left_text.get('1.0', END) blocks = self._split_to_blocks for block in blocks: Translator(block, result) self.right_text.delete('1.0', END) if not result: return for item in result: self.right_text.insert(INSERT, item)
def main(): file = "in.p2h" translator = Translator(file) return translator.translate()
def test_listeners(self): output1 = [] def listener1(undo, do, prev): output1.append((undo, do, prev)) output2 = [] def listener2(undo, do, prev): output2.append((undo, do, prev)) t = Translator() s = Stroke('S') tr = Translation([s], StenoDictionary()) expected_output = [([], [tr], tr)] t.translate(s) t.add_listener(listener1) t.translate(s) self.assertEqual(output1, expected_output) del output1[:] t.add_listener(listener2) t.translate(s) self.assertEqual(output1, expected_output) self.assertEqual(output2, expected_output) del output1[:] del output2[:] t.add_listener(listener2) t.translate(s) self.assertEqual(output1, expected_output) self.assertEqual(output2, expected_output) del output1[:] del output2[:] t.remove_listener(listener1) t.translate(s) self.assertEqual(output1, []) self.assertEqual(output2, expected_output) del output1[:] del output2[:] t.remove_listener(listener2) t.translate(s) self.assertEqual(output1, []) self.assertEqual(output2, [])
def test_translator(self): # It's not clear that this test is needed anymore. There are separate # tests for _translate_stroke and test_translate_calls_translate_stroke # makes sure that translate calls it properly. But since I already wrote # this test I'm going to keep it. class Output(object): def __init__(self): self._output = [] def write(self, undo, do, prev): for t in undo: self._output.pop() for t in do: if t.english: self._output.append(t.english) else: self._output.append('/'.join(t.rtfcre)) def get(self): return ' '.join(self._output) def clear(self): del self._output[:] d = StenoDictionary() out = Output() t = Translator() dc = StenoDictionaryCollection() dc.set_dicts([d]) t.set_dictionary(dc) t.add_listener(out.write) t.translate(stroke('S')) self.assertEqual(out.get(), 'S') t.translate(stroke('T')) self.assertEqual(out.get(), 'S T') t.translate(stroke('*')) self.assertEqual(out.get(), 'S') t.translate(stroke('*')) self.assertEqual(out.get(), 'S') # Undo buffer ran out. t.set_min_undo_length(3) out.clear() t.translate(stroke('S')) self.assertEqual(out.get(), 'S') t.translate(stroke('T')) self.assertEqual(out.get(), 'S T') t.translate(stroke('*')) self.assertEqual(out.get(), 'S') t.translate(stroke('*')) self.assertEqual(out.get(), '') t.translate(stroke('*')) self.assertEqual(out.get(), '') # Undo buffer ran out. out.clear() d[('S',)] = 't1' d[('T',)] = 't2' d[('S', 'T')] = 't3' t.translate(stroke('S')) self.assertEqual(out.get(), 't1') t.translate(stroke('T')) self.assertEqual(out.get(), 't3') t.translate(stroke('T')) self.assertEqual(out.get(), 't3 t2') t.translate(stroke('S')) self.assertEqual(out.get(), 't3 t2 t1') t.translate(stroke('*')) self.assertEqual(out.get(), 't3 t2') t.translate(stroke('*')) self.assertEqual(out.get(), 't3') t.translate(stroke('*')) self.assertEqual(out.get(), 't1') t.translate(stroke('*')) self.assertEqual(out.get(), '') t.translate(stroke('S')) self.assertEqual(out.get(), 't1') t.translate(stroke('T')) self.assertEqual(out.get(), 't3') t.translate(stroke('T')) self.assertEqual(out.get(), 't3 t2') d[('S', 'T', 'T')] = 't4' d[('S', 'T', 'T', 'S')] = 't5' t.translate(stroke('S')) self.assertEqual(out.get(), 't5') t.translate(stroke('*')) self.assertEqual(out.get(), 't3 t2') t.translate(stroke('*')) self.assertEqual(out.get(), 't3') t.translate(stroke('T')) self.assertEqual(out.get(), 't4') t.translate(stroke('S')) self.assertEqual(out.get(), 't5') t.translate(stroke('S')) self.assertEqual(out.get(), 't5 t1') t.translate(stroke('*')) self.assertEqual(out.get(), 't5') t.translate(stroke('*')) self.assertEqual(out.get(), 't4') t.translate(stroke('*')) self.assertEqual(out.get(), 't3') t.translate(stroke('*')) self.assertEqual(out.get(), 't1') t.translate(stroke('*')) self.assertEqual(out.get(), '') d.clear() s = stroke('S') t.translate(s) t.translate(s) t.translate(s) t.translate(s) s = stroke('*') t.translate(s) t.translate(s) t.translate(s) t.translate(s) self.assertEqual(out.get(), 'S') # Not enough undo to clear output. out.clear() t.remove_listener(out.write) t.translate(stroke('S')) self.assertEqual(out.get(), '')
class BaseState(object): """ This class is a parent-class for all state handlers, it provides: - interface to Google Translations API - interface to our own phrases/responses - automatically translates our strings to the language of the user - automatically adds pictures to the chosen intents/conversation steps/questions - interface to the database - interface to the NLU (Natural Language Understanding unit - https://github.com/HumanbiOS/rasa) - automatic requests queueing - automatic database updates for the `User` object Note 0: In the text there are some special words: $(name) - refers to the random meaningful (or not) string that came to your mind $(root) - refers to the project directory Note 1: Server will pick up files and extract state classes from them, you don't need to worry about "registering state", there is no hardcoded list The important detail is that you **must** put state.py with the state handler to the $(root)/fsm/states folder. Note 2: It's a better practise to put each state handler in its own file. Naming Conventions: - name of the python class - `$(name)State` Example: ` class MyBeautifulState:` ` class BasicQuestionsState:` - name of the file - *snake lower case* of $(name). "state" might be omitted (filename matters only to avoid confusions, refer to note 1) Example: `my_beautiful_state.py` `basic_questions.py` """ HEADERS = {"Content-Type": "application/json"} # This variable allows to ignore `entry()` when needed has_entry = True # @Important: instantiate important classes tr = Translator() db = Database() nlu = NLUWorker(tr) STRINGS = Strings(tr, db) # Data buffer that is assigned to when the class is initialized, stores reference to the relevant Botsociety Data bots_data = None # Media path and folder media_folder = "media" media_path = os.path.join(ROOT_PATH, media_folder) if not os.path.exists(media_path): os.mkdir(media_path) # Prepare state def __init__(self): # Keeps list of tasks self.tasks = list() self.random_tasks = list() # Keeps execution queue self.execution_queue = list() # Create language variable self.__language = None self.strings = None def set_language(self, value: str): """ This method sets language to a current state If language is None - base language version is english Args: value (str): language code of the user's country """ self.__language = value or "en" self.strings = StringAccessor(self.__language, self.STRINGS) async def wrapped_entry(self, context: Context, user: User): """ This method is executed when user enters State for the first time, if `has_entry` variable is set to True. It is a wrapper for state-author-controlled `entry` method. Args: context (Context): holds parsed and verified request, with auto-filled default values user (User): user object that is stored directly in the database """ # Set language self.set_language(user['language']) # Wrap base method to avoid breaking server try: # Execute state method status = await self.entry(context, user, self.db) except Exception as e: # Do not commit to database if something went wrong status = OK(commit=False) # Log exception logging.exception(e) # Commit changes to database if status.commit: await self.db.commit_user(user=user) # @Important: Fulfill text promises if self.strings.promises: await self.strings.fill_promises() # @Important: Since we call this always, check if the call is actually needed if self.tasks: # @Important: collect all requests _results = await self._collect() # @Important: Execute all queued jobs if self.execution_queue: await self._execute_tasks() return status async def wrapped_process(self, context: Context, user: User): """ This method is executed when user enters State for second or any consequent time, or for the first time if `has_entry` variable is set to False. It is a wrapper for state-author-modified `process` method. Args: context (Context): holds parsed and verified request, with auto-filled default values user (User): user object that is stored directly in the database """ # Set language self.set_language(user['language']) # Wrap base method to avoid breaking server try: # Execute state method status = await self.process(context, user, self.db) except Exception as e: # Do not commit to database if something went wrong status = OK(commit=False) # Log exception logging.exception(e) # Commit changes to database if status.commit: await self.db.commit_user(user=user) # @Important: Fulfill text promises if self.strings.promises: await self.strings.fill_promises() # @Important: Since we call this always, check if the call is actually needed if self.tasks: # @Important: collect all requests await self._collect() # @Important: Execute all queued jobs if self.execution_queue: await self._execute_tasks() return status # Actual state method to be written for the state async def entry(self, context: Context, user: User, db): """ The method handles each interaction when user enters your state Args: context (Context): context object of the request user (User): user object from database corresponding to the user who sent message db (Database): database wrapper object """ return OK # Actual state method to be written for the state async def process(self, context: Context, user: User, db): """ The method handles each interaction with user (except first interaction) Args: context (Context): context object of the request user (User): user object from database corresponding to the user who sent message db (Database): database wrapper object """ return OK def parse_button(self, raw_text: str, truncated=False, truncation_size=20, verify=None) -> Button: """ Function compares input text to all available strings (of user's language) and if finds matching - returns Button object, which has text and key attributes, where text is raw_text and key is a key of matched string from strings.json Args: raw_text (str): just user's message truncated (bool): option to look for not full matches (only first `n` characters). Defaults to False. truncation_size (int): number of sequential characters to match. Defaults to 20. verify (list, set): a custom object that is used instead of global language object (e.g. you want a match from the list of specific buttons) """ btn = Button(raw_text) lang_obj = self.STRINGS.cache.get(self.__language) # Make sure that certain language file exists if lang_obj and verify: lang_obj = [(key, lang_obj[key]) for key in verify] elif lang_obj: lang_obj = lang_obj.items() for k, v in lang_obj: if v == raw_text or (truncated and len(v) > truncation_size and v[:truncation_size] == raw_text[:truncation_size]): # [DEBUG] # logging.info(value) btn.set_key(k) break return btn # Parse intent of single-formatted string, comparing everything but inserted # Returns True -> intent matched # Returns None or False -> intent didn't match def parse_fstring(self, raw_text: str, promise: TextPromise, item1: str = "{", item2: str = "}"): """ Method lets you compare "filled" f-string with "un-filled" one to identify intent, which is not possible with simple `==` comparison, because the f-string is *actually* "filled". Compares sub-strings to the "{" char and after the "}" char, exclusively. Args: raw_text (str): raw input, which is "filled" string promise (TextPromise): cached input, "un-filled" string item1 (str): object to compare from start to position of it in the strings *exclusively* item2 (str): object to compare from its position to the end of the strings *exclusively* """ if promise.value and isinstance(promise.value, str): # Find where "{" or "}" should've been, then use it to go one char left or right, accordingly i1 = promise.value.find(item1) i2 = promise.value.find(item2) if i1 != -1 and i2 != -1: # Find from the end, so can use negative index # Can't just measure from the start, because there will be inserted text of random length i2 = len(promise.value) - i2 return raw_text[:i1] == promise.value[:i1] and raw_text[ -i2 + 1:] == promise.value[-i2 + 1:] # @Important: easy method to prepare context def set_data(self, context: Context, question: dict, avoid_buttons: list = None): # Change value from None to empty list for the "in" operator avoid_buttons = avoid_buttons or [] # Set according text context['request']['message']['text'] = self.strings[ question["text_key"]] # Always have buttons context['request']['has_buttons'] = True context['request']['buttons_type'] = "text" # If not a free question -> add it's buttons if not question["free_answer"]: context['request']['buttons'] = [ {"text": self.strings[button["text_key"]]} for button in question["buttons"] \ if button["text_key"] not in avoid_buttons ] else: context['request']['buttons'] = [] # Always add edge buttons context['request']['buttons'] += [{ "text": self.strings['back'] }, { "text": self.strings['stop'] }] # Add file if needed media = question.get('image') if media: context['request']['has_file'] = True context['request']['file'] = [{"payload": media}] # @Important: 1) find better way with database # @Important: 2) What if we do it in non blocking asyncio.create_task (?) # @Important: But on the other hand, we can't relay on the file status # @Important: for example if next call needs to upload it somewhere # @Important: If you deal with reliability and consistency - great optimisation async def download_by_url(self, url, *folders, filename): """ Downloads any file to the given directory with given filename from the url, in asynchronous way (not-blocking-ish). """ # TODO: Use async executor for real non-blocking? # TODO: Or, do we really need this method? # Make sure file exists if not self.exists(*folders): # Create folder on the path os.mkdir(os.path.join(self.media_path, *folders)) # Full file path with filename filepath = os.path.join(self.media_path, *folders, filename) # Initiate aiohttp sessions, get file async with ClientSession() as session: async with session.get(url) as response: # Open file with aiofiles and start steaming bytes, write to the file logging.debug(f"Downloading file: {url} to {filepath}") async with aiofiles.open(filepath, 'wb') as f: async for chunk in response.content.iter_any(): await f.write(chunk) logging.debug(f"Finished download [{filepath}]") return filepath # @Important: check if downloaded file exist def exists(self, *args): """ Checks for the file in the passed directory/filepath, shortcut for the os `exists` and `join` methods """ return os.path.exists(os.path.join(self.media_path, *args)) # @Important: high level access to translation module # @Important: note, though, that we shouldn't abuse translation api # @Important: because a) it's not good enough, b) it takes time to make # @Important: a call to the google cloud api async def translate(self, text: str, target: str) -> str: """ Method is wrapper for translation_text from translation module. Simply returns translated text for the target language. Good usage example if translating text between users. Args: text (str): message to translate target (str): target language (ISO 639-1 code) """ return await self.tr.translate_text(text, target) # @Important: command to actually send all collected requests from `process` or `entry` async def _collect(self): results = list() async with ClientSession(json_serialize=lambda o: json.dumps( o, cls=PromisesEncoder)) as session: # @Important: Since asyncio.gather order is not preserved, we don't want to run them concurrently # @Important: so, gather tasks that were tagged with "allow_gather". # @Important: Group tasks by the value of "size" for the sake of not hitting front end too hard size = 30 for coeff in range(len(self.random_tasks[::size])): results.extend(await asyncio.gather( *(self._send(r_task, session) for r_task in self.random_tasks[size * coeff:size * (coeff + 1)]))) # Send ordinary tasks for each_task in self.tasks: res = await self._send(each_task, session) results.append(res) return results # @Important: Real send method, takes SenderTask as argument async def _send(self, task: SenderTask, session: ClientSession): # Takes instance data holder object with the name from the tokens storage, extracts url url = tokens[task.service].url # Unpack context, set headers (content-type: json) async with session.post(url, json=task.context, headers=self.HEADERS) as resp: # If reached server - log response if resp.status == 200: pass # [DEBUG] #result = await resp.json() #if result: # logging.info(f"Sending task status: {result}") # return result #else: # logging.info(f"Sending task status: No result") # Otherwise - log error else: logging.error( f"[ERROR]: Sending task (service={task.service}, context={task.context}) status {await resp.text()}" ) # @Important: `send` METHOD THAT ALLOWS TO SEND PAYLOAD TO THE USER def send(self, to_entity: Union[User, str], context: Context, allow_gather=False): """ Method creates task that sends context['request'] to the to_user User after executing your code inside state. Args: to_entity (User, str): user object to send message to, or just service name context (Context): request context that is send to the user. The object is deep copied so it can't be changed further in code (reliable consistency for multiple requests) """ # @Important: [Explanation to the code below]: # @Important: maybe add some queue of coroutines and dispatch them all when handler return status (?) # @Important: or just dispatch them via asyncio.create_task so it will be more efficient (?) # @Important: reasoning: # @Important: simple way: server -> request1 -> status1 -> request2 -> status2 -> request3 -> status3 # @Important: this way: server -> gather(request1, request2, request3) -> log(status1, status2, status3) if isinstance(to_entity, str): service = to_entity else: service = to_entity['via_instance'] task = SenderTask(service, copy.deepcopy(context.__dict__['request'])) if allow_gather: self.random_tasks.append(task) else: self.tasks.append(task) async def _execute_tasks(self): results = await asyncio.gather(*[ exec_task.func(*exec_task.args, **exec_task.kwargs) for exec_task in self.execution_queue ]) return results def create_task(self, func, *args, **kwargs): """ Method executes async function (with given args and kwargs) immediately after processing state. Args: func (Async Func): function to be executed args (Any): args to be passed into the func kwargs (Any): kwargs to be passed into the func """ self.execution_queue.append(ExecutionTask(func, args, kwargs)) def create_conversation(self, user1: User, user2: User, context: Context, message: Optional[str] = None) -> None: user1['context']['conversation'] = { "user_id": user2['user_id'], "via_instance": user2['via_instance'], "type": user2['type'] } user2['context']['conversation'] = { "user_id": user1['user_id'], "via_instance": user1['via_instance'], "type": user1['type'] } # Send message to them if message or message is None: if message is None: message = "You've just started realtime conversation. Just start typing to talk to them!" context['request']['user']['user_id'] = 1 context['request']['user']['first_name'] = "HumanBios" context['request']['chat']['chat_id'] = user1['user_id'] context['request']['message']['text'] = message self.send(user1, context) context['request']['chat']['chat_id'] = user2['user_id'] self.send(user2, context) user1['states'].append("ConversationState") user2['states'].append("ConversationState")
def test_listeners(self): output1 = [] def listener1(undo, do, prev): output1.append((undo, do, prev)) output2 = [] def listener2(undo, do, prev): output2.append((undo, do, prev)) t = Translator() s = stroke('S') tr = Translation([s], None) expected_output = [([], [tr], tr)] t.translate(s) t.add_listener(listener1) t.translate(s) self.assertEqual(output1, expected_output) del output1[:] t.add_listener(listener2) t.translate(s) self.assertEqual(output1, expected_output) self.assertEqual(output2, expected_output) del output1[:] del output2[:] t.add_listener(listener2) t.translate(s) self.assertEqual(output1, expected_output) self.assertEqual(output2, expected_output) del output1[:] del output2[:] t.remove_listener(listener1) t.translate(s) self.assertEqual(output1, []) self.assertEqual(output2, expected_output) del output1[:] del output2[:] t.remove_listener(listener2) t.translate(s) self.assertEqual(output1, []) self.assertEqual(output2, [])
def test_changing_state(self): output = [] def listener(undo, do, prev): output.append((undo, do, prev)) d = StenoDictionary() d[('S', 'P')] = 'hi' dc = StenoDictionaryCollection() dc.set_dicts([d]) t = Translator() t.set_dictionary(dc) t.translate(stroke('T')) t.translate(stroke('S')) s = copy.deepcopy(t.get_state()) t.add_listener(listener) expected = [([Translation([stroke('S')], None)], [Translation([stroke('S'), stroke('P')], 'hi')], Translation([stroke('T')], None))] t.translate(stroke('P')) self.assertEqual(output, expected) del output[:] t.set_state(s) t.translate(stroke('P')) self.assertEqual(output, expected) del output[:] t.clear_state() t.translate(stroke('P')) self.assertEqual(output, [([], [Translation([stroke('P')], None)], None)]) del output[:] t.set_state(s) t.translate(stroke('P')) self.assertEqual(output, [([], [Translation([stroke('P')], None)], Translation([stroke('S'), stroke('P')], 'hi'))])
output_path = args.Out_path unpack_complexes = args.unpack_complexes unpack_nested = args.unpack_nested_complexes include_influences = args.include_positive_influences quiet = args.quiet replace_spaces = args.replace_non_word_chars include_rates = args.rates if unpack_nested: unpack_complexes = True cp = CellDesignerParser(input_path) cp.generate_warnings = not quiet cp.parse_tree() tsr = Translator() tsr.unpack_complexes = unpack_complexes tsr.unpack_nested_complexes = unpack_nested tsr.generate_warnings = not quiet tsr.replace_spaces = replace_spaces rules_list = [] for process in cp.process_list: rule = tsr.sbgn_transition_to_bcsl_rule(process) rules_list.append(rule) out_file = open(output_path, "w") for rule in rules_list: out_file.write( rule.__str__(include_modifiers=include_influences, include_artificial_rates=include_rates))
def setUp(self): self.t = Translator() self.s = type(self).FakeState() self.t._state = self.s self.d = StenoDictionary() self.t.set_dictionary(self.d)
def test_changing_state(self): output = [] def listener(undo, do, prev): output.append((undo, do, prev)) d = StenoDictionary() d[('S', 'P')] = 'hi' t = Translator() t.set_dictionary(d) t.translate(Stroke('T')) t.translate(Stroke('S')) s = copy.deepcopy(t.get_state()) t.add_listener(listener) expected = [([Translation([Stroke('S')], d)], [Translation([Stroke('S'), Stroke('P')], d)], Translation([Stroke('T')], d))] t.translate(Stroke('P')) self.assertEqual(output, expected) del output[:] t.set_state(s) t.translate(Stroke('P')) self.assertEqual(output, expected) del output[:] t.clear_state() t.translate(Stroke('P')) self.assertEqual(output, [([], [Translation([Stroke('P')], d)], None)]) del output[:] t.set_state(s) t.translate(Stroke('P')) self.assertEqual(output, [([], [Translation([Stroke('P')], d)], Translation([Stroke('S'), Stroke('P')], d))])
class KeyEater( Frame ): '''For antighosting qwerty keyboard such as Sidewinder X4''' def __init__( self, exportDic, dictType ): Frame.__init__( self ) self.pack( expand = YES, fill = BOTH ) self.master.title( "Plover, The Open Source Steno Program" ) self.master.geometry( "950x50" ) self.message1 = StringVar() self.line1 = Label( self, textvariable = self.message1 ) self.message1.set( "Plover for SideWinder X4 -- http://plover.stenoknight.com" ) self.line1.pack() self.message2 = StringVar() self.line2 = Label( self, textvariable = self.message2 ) self.message2.set( "Dictionary Format: %s" % dictType ) self.line2.pack() self.master.bind( "<KeyPress>", self.keyPressed ) self.master.bind( "<KeyRelease>", self.keyReleased ) # Initialization for steno-specific actions self.translator = Translator(30, exportDic, dictType, sidewinder.Stroke) self.translator.subscribe(self.emitted) self.downKeys = [] self.releasedKeys = [] self.translationFile = open("log.txt", "w") self.dictType = dictType def keyPressed( self, event ): self.downKeys.append(event.char) self.downKeys.sort() def keyReleased( self, event ): self.releasedKeys.append(event.char) self.releasedKeys.sort() if self.downKeys == self.releasedKeys: try: self.translator.translate(self.releasedKeys) except KeyError: self.releasedKeys = [] self.downKeys = [] self.message2.set(self.translator.fullTranslation()) self.downKeys = [] self.releasedKeys = [] def emitted(self, translation) : if translation.isCorrection : tell = self.translationFile.tell() if translation.english : i = tell - (len(translation.english) + 1) else : i = tell - (len(translation.rtfcre) + 1) # XXX Possibly the seek problem is here? Raise exception? self.translationFile.seek(i, 0) self.translationFile.truncate() else : if translation.english : out = translation.english else : out = translation.rtfcre self.translationFile.write(out + ' ') self.translationFile.flush()
def test_translator(self): # It's not clear that this test is needed anymore. There are separate # tests for _translate_stroke and test_translate_calls_translate_stroke # makes sure that translate calls it properly. But since I already wrote # this test I'm going to keep it. class Output(object): def __init__(self): self._output = [] def write(self, undo, do, prev): for t in undo: self._output.pop() for t in do: if t.english: self._output.append(t.english) else: self._output.append('/'.join(t.rtfcre)) def get(self): return ' '.join(self._output) def clear(self): del self._output[:] d = StenoDictionary() out = Output() t = Translator() t.set_dictionary(d) t.add_listener(out.write) t.translate(Stroke('S')) self.assertEqual(out.get(), 'S') t.translate(Stroke('T')) self.assertEqual(out.get(), 'S T') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 'S') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 'S') # Undo buffer ran out. t.set_min_undo_length(3) out.clear() t.translate(Stroke('S')) self.assertEqual(out.get(), 'S') t.translate(Stroke('T')) self.assertEqual(out.get(), 'S T') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 'S') t.translate(Stroke('*', True)) self.assertEqual(out.get(), '') t.translate(Stroke('*', True)) self.assertEqual(out.get(), '') # Undo buffer ran out. out.clear() d[('S', )] = 't1' d[('T', )] = 't2' d[('S', 'T')] = 't3' t.translate(Stroke('S')) self.assertEqual(out.get(), 't1') t.translate(Stroke('T')) self.assertEqual(out.get(), 't3') t.translate(Stroke('T')) self.assertEqual(out.get(), 't3 t2') t.translate(Stroke('S')) self.assertEqual(out.get(), 't3 t2 t1') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't3 t2') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't3') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't1') t.translate(Stroke('*', True)) self.assertEqual(out.get(), '') t.translate(Stroke('S')) self.assertEqual(out.get(), 't1') t.translate(Stroke('T')) self.assertEqual(out.get(), 't3') t.translate(Stroke('T')) self.assertEqual(out.get(), 't3 t2') d[('S', 'T', 'T')] = 't4' d[('S', 'T', 'T', 'S')] = 't5' t.translate(Stroke('S')) self.assertEqual(out.get(), 't5') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't3 t2') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't3') t.translate(Stroke('T')) self.assertEqual(out.get(), 't4') t.translate(Stroke('S')) self.assertEqual(out.get(), 't5') t.translate(Stroke('S')) self.assertEqual(out.get(), 't5 t1') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't5') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't4') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't3') t.translate(Stroke('*', True)) self.assertEqual(out.get(), 't1') t.translate(Stroke('*', True)) self.assertEqual(out.get(), '') d.clear() t.translate(Stroke('S')) t.translate(Stroke('S')) t.translate(Stroke('S')) t.translate(Stroke('S')) t.translate(Stroke('*', True)) t.translate(Stroke('*', True)) t.translate(Stroke('*', True)) t.translate(Stroke('*', True)) self.assertEqual(out.get(), 'S') # Not enough undo to clear output. out.clear() t.remove_listener(out.write) t.translate(Stroke('S')) self.assertEqual(out.get(), '')
async def main(languages: list): tr = Translator() english = get_english_strings() results = await tr.translate_multiple_languages_dict(languages, english) for language, translation in zip(languages, results): put_translated_strings(language, translation)
def main(): # Instantiate parser parser = ArgumentParser() # Add arguments parser.add_argument("--mode", "-m", help="Whether to 'train' or to 'predict'", required=True) parser.add_argument("--prediction_scheme", help="Whether to use a beam search ('beam'), a greedy search ('greedy')" " or a one-time lookup ('one_time')") parser.add_argument("--data", "-d", help="The path to the data in TFRecords format") parser.add_argument("--de_vocab", help="The GERMAN vocabulary list", required=True) parser.add_argument("--en_vocab", help="The ENGLISH vocabulary list", required=True) parser.add_argument("--pre_trained_embedding_de", help="The pre-trained GERMAN embeddings (.npy)") parser.add_argument("--pre_trained_embedding_en", help="The pre-trained ENGLISH embeddings (.npy)") parser.add_argument("--config", "-c", help="The YAML file that defines the model/training", required=True) parser.add_argument("--temp", "-t", help="Where to store temporary files by TensorFlow", default=tempfile.gettempdir()) parser.add_argument("--verify", "-v", help="Whether to print the input data for verification purposes", default=False, action="store_true") parser.add_argument("--model_dir", help="Where the checkpoint of a trained TF model resides", required=False) parser.add_argument("--debug", help="Whether to activate TensorFlow's debug mode", default=False, action="store_true") parser.add_argument("--tensorboard", help="Whether to spawn a TensorBoard daemon", default=False, action="store_true") # Parse args = parser.parse_args() # Input assertions assert args.mode in {"train", "predict"} assert args.mode == "train" or args.prediction_scheme in ("beam", "greedy", "one_time") assert os.path.exists(args.config), "Config does not exist!" # Parse config config = yaml.load(open(args.config, "r")) assert {"batch_size", "num_epochs", "shuffle_buffer_size"} <= set(config.keys()) # Resolve symbols config = resolve_symbols(config) # Add vocabulary files if args.pre_trained_embedding_de and args.pre_trained_embedding_en: config["pre_trained_embedding_de"] = args.pre_trained_embedding_de config["pre_trained_embedding_en"] = args.pre_trained_embedding_en config["en_vocab_fname"] = args.en_vocab config["de_vocab_fname"] = args.de_vocab config["en_vocab_num_words"] = sum(1 for _ in open(config["en_vocab_fname"], "r")) config["de_vocab_num_words"] = sum(1 for _ in open(config["de_vocab_fname"], "r")) # Random sub-directory if args.mode == "train": assert os.path.exists(args.data), "Data does not exist!" if args.model_dir: # Improve existing model model_dir = args.model_dir else: model_dir = os.path.join(args.temp, "TF_" + "".join(choices(string.ascii_lowercase, k=7))) print("Created TF temporary directory: {}".format(model_dir)) else: assert args.model_dir is not None, "You have to specify a valid model directory in prediction mode!" model_dir = args.model_dir # Get input function input_fn = partial(get_input_fn, fname=args.data, config=config) if args.verify: verify_data(input_fn, de_vocab_fname=config["de_vocab_fname"], de_vocab_num_words=config["de_vocab_num_words"], en_vocab_fname=config["en_vocab_fname"], en_vocab_num_words=config["en_vocab_num_words"]) else: # Create estimator session_config = tf.ConfigProto(log_device_placement=config["log_device_placement"]) estimator_config = tf.estimator.RunConfig(session_config=session_config, save_checkpoints_secs=60 * config["save_checkpoints_mins"], keep_checkpoint_max=config["keep_checkpoint_max"]) estimator = tf.estimator.Estimator(model_fn, model_dir=model_dir, params=config, config=estimator_config) if args.mode == "train": # Train estimator hooks = [tf_debug.LocalCLIDebugHook()] if args.debug else None estimator.train(input_fn, hooks=hooks, steps=None) else: # Concurrent translator (avoids having to reload TF checkpoint after each generated token) translator = Translator(estimator) # Build inverse vocabulary (from integer IDs to tokens) inv_vocab = [token.strip() for token in open(config["en_vocab_fname"], "r")] + ["<UNK>"] while True: # Obtain user input print("Type some German sentence and hit ENTER (CTRL-C to quit)") german_input = input() # Create dataset containing this input start_dict = create_interactive_data(user_input=german_input) # Get translation if args.prediction_scheme == "beam": translator.beam_predict(start_dict=start_dict, inv_vocab=inv_vocab, beam_size=config["beam_size"]) elif args.prediction_scheme == "greedy": translator.greedy_predict(start_dict=start_dict, inv_vocab=inv_vocab) else: start_dict.update({ "en_text": np.array(["<<START>>"], dtype=np.object), "en_text_length": 1 }) prediction = translator.predict(past=start_dict) ix = np.argsort(-prediction)[:20] tokens = [inv_vocab[i] for i in ix] print(tokens)
class BaseState(object): HEADERS = { "Content-Type": "application/json" } # This variable allows to ignore `entry()` when needed has_entry = True # @Important: instantiate important classes tr = Translator() db = Database() nlu = NLUWorker(tr) STRINGS = Strings(tr, db) files = FILENAMES # Media path and folder media_folder = "media" media_path = os.path.join(ROOT_PATH, media_folder) if not os.path.exists(media_path): os.mkdir(media_path) # Prepare state def __init__(self): # Keeps list of tasks self.tasks = list() # Keeps execution queue self.execution_queue = list() # Create language variable self.__language = None self.strings = None def set_language(self, value: str): """ This method sets language to a current state If language is None - base language version is english Args: value (str): language code of the user's country """ self.__language = value or "en" self.strings = StringAccessor(self.__language, self.STRINGS) async def wrapped_entry(self, context: Context, user: User): # Set language self.set_language(user['language']) # Wrap base method to avoid breaking server try: # Execute state method status = await self.entry(context, user, self.db) except Exception as e: # Do not commit to database if something went wrong status = OK(commit=False) # Log exception logging.exception(e) # Commit changes to database if status.commit: await self.db.commit_user(user=user) # @Important: Fulfill text promises if self.strings.promises: await self.strings.fill_promises() # @Important: Since we call this always, check if # @Important: the call is actually needed if self.tasks: # @Important: collect all requests _results = await self.collect() # @Important: Execute all queued jobs if self.execution_queue: await self.execute_tasks() return status async def wrapped_process(self, context: Context, user: User): # Set language self.set_language(user['language']) # Wrap base method to avoid breaking server try: # Execute state method status = await self.process(context, user, self.db) except Exception as e: # Do not commit to database if something went wrong status = OK(commit=False) # Log exception logging.exception(e) # Commit changes to database if status.commit: await self.db.commit_user(user=user) # @Important: Fulfill text promises if self.strings.promises: await self.strings.fill_promises() # @Important: Since we call this always, check if # @Important: the call is actually needed if self.tasks: # @Important: collect all requests await self.collect() # @Important: Execute all queued jobs if self.execution_queue: await self.execute_tasks() return status # Actual state method to be written for the state async def entry(self, context: Context, user: User, db): return OK # Actual state method to be written for the state async def process(self, context: Context, user: User, db): return OK def parse_button(self, raw_text: str, truncated=False) -> Button: btn = Button(raw_text) lang_obj = self.STRINGS.cache.get(self.__language) if lang_obj is not None: if not truncated: for key, value in lang_obj.items(): if value == raw_text: btn.set_key(key) break else: for key, value in lang_obj.items(): if len(value) > 20 and value[:20] == raw_text[:20]: btn.set_key(key) break elif value == raw_text: btn.set_key(key) break return btn # @Important: 1) find better way with database # @Important: 2) What if we do it in non blocking asyncio.create_task (?) # @Important: But on the other hand, we can't relay on the file status # @Important: for example if next call needs to upload it somewhere # @Important: If you deal with reliability and consistency - great optimisation async def download_by_url(self, url, *folders, filename): # Make sure file exists if not self.exists(*folders): # Create folder on the path os.mkdir(os.path.join(self.media_path, *folders)) # Full file path with filename filepath = os.path.join(self.media_path, *folders, filename) # Initiate aiohttp sessions, get file async with ClientSession() as session: async with session.get(url) as response: # Open file with aiofiles and start steaming bytes, write to the file logging.debug(f"Downloading file: {url} to {filepath}") async with aiofiles.open(filepath, 'wb') as f: async for chunk in response.content.iter_any(): await f.write(chunk) logging.debug(f"Finished download [{filepath}]") return filepath # @Important: check if downloaded file exist def exists(self, *args): return os.path.exists(os.path.join(self.media_path, *args)) # @Important: high level access to translation module # @Important: note, though, that we shouldn't abuse translation api # @Important: because a) it's not good enough, b) it takes time to make # @Important: a call to the google cloud api async def translate(self, text: str, target: str) -> str: return await self.tr.translate_text(text, target) # Sugar # @Important: command to actually send all collected requests from `process` or `entry` async def collect(self): results = list() async with ClientSession(json_serialize=lambda o: json.dumps(o, cls=PromisesEncoder)) as session: # @Important: Since asyncio.gather order is not preserved, we don't want to run them concurrently # tasks = [self._send(task, session) for task in self.tasks[id(context)]] # group = asyncio.gather(*tasks) # results = await group # return results for each_task in self.tasks: res = await self._send(each_task, session) results.append(res) return results # @Important: Real send method, takes SenderTask as argument async def _send(self, task: SenderTask, session: ClientSession): # Takes instance data holder object with the name from the tokens storage, extracts url url = tokens[task.user['via_instance']].url # Unpack context, set headers (content-type: json) async with session.post(url, json=task.context, headers=self.HEADERS ) as resp: # If reached server - log response if resp.status == 200: pass # [DEBUG] #result = await resp.json() #if result: # logging.info(f"Sending task status: {result}") # return result #else: # logging.info(f"Sending task status: No result") # Otherwise - log error else: logging.error(f"[ERROR]: Sending task (user={task.user}, context={task.context}) status {await resp.text()}") # @Important: `send` METHOD THAT ALLOWS TO SEND PAYLOAD TO THE USER def send(self, to_user: User, context: Context): # @Important: [Explanation to the code below]: # @Important: maybe add some queue of coroutines and dispatch them all when handler return status (?) # @Important: or just dispatch them via asyncio.create_task so it will be more efficient (?) # @Important: reasoning: # @Important: simple way: server -> request1 -> status1 -> request2 -> status2 -> request3 -> status3 # @Important: this way: server -> gather(request1, request2, request3) -> log(status1, status2, status3) # @Important: The easy way to add files from files.json if isinstance(context['request']['message']['text'], TextPromise): # Find according key for files from TextPromise files = self.files.get(context['request']['message']['text'].key, list()) #logging.info(files) context['request']['file'] = [{"payload": _file} for _file in files] context['request']['has_file'] = bool(files) context['request']['has_image'] = bool(files) # [DEBUG] # logging.info(context['request']['message']['text'].key) else: # [DEBUG] pass # logging.info(context['request']['message']['text']) self.tasks.append(SenderTask(to_user, copy.deepcopy(context.__dict__['request']))) async def execute_tasks(self): results = await asyncio.gather( *[exec_task.func(*exec_task.args, **exec_task.kwargs) for exec_task in self.execution_queue] ) return results def create_task(self, func, *args, **kwargs): self.execution_queue.append(ExecutionTask(func, args, kwargs))