def __parse_sentence(sent, doc_offset, tokenizer): """ Perform tokenization on sent :param string sent: :param tuple doc_offset: (start, end) :param tokenizer: :return: Sentence object """ sent_obj = models.Sentence(content=sent, doc_offset=doc_offset) sent_obj.tokens = [] tokens = tokenizer.parse(sent) current_pos = 0 for tok in tokens: t = tok.string.strip() start_offset = sent.find(t, current_pos) end_offset = start_offset + len(t) token_obj = models.Token(content=t, sent_offset=( start_offset, end_offset), doc_offset=( doc_offset[0] + start_offset, doc_offset[0] + end_offset), metadata={'POS': tok.tag_}) current_pos = end_offset sent_obj.tokens.append(token_obj) return sent_obj
def parse_sentence(sent, doc_offset, tokenizer): """ Perform tokenization on sent :param string sent: :param tuple doc_offset: (start, end) :param tokenizer: :return: models.Sentence object """ sent_obj = models.Sentence(content=sent, doc_offset=doc_offset) sent_obj.tokens = [] tokens = tokenizer.tokenize(sent) current_pos = 0 for tok in tokens: start_offset = sent.find(tok, current_pos) end_offset = start_offset + len(tok) token_obj = models.Token( content=tok, sent_offset=(start_offset, end_offset), doc_offset=(doc_offset[0] + start_offset, doc_offset[0] + end_offset), # metadata={'pos_tag': pos} ) current_pos = end_offset sent_obj.tokens.append(token_obj) return sent_obj
def check_if_user_edited(user, sentenceID): ''' This filter checks if the user edited the sentence :param string user: the user :param string sentenceID: the sentence's id :returns: a boolean saying if the user edited the sentence ''' s = models.Sentence(oid=sentenceID) return models.User(username=user)._id == s.state['userID']
def check_if_user_approved(user, sentenceID): ''' This filter checks if the user approved the sentence :param string user: the user's username :param string sentenceID: the sentence's id :returns: a boolean saying if the user approved the sentence ''' s = models.Sentence(oid=sentenceID) return models.User(username=user)._id in s.state['approvers']
def check_if_user_edited(user, sentenceID): ''' This filter checks if the user edited the sentence :Parameters: - 'user': the user - 'sentenceID': the sentence :Returns: - a boolean saying if the user edited the sentence ''' s=models.Sentence(oid=sentenceID) return models.User(username=user)._id == s.state['userID']
def unapprove_translation(): ''' This function is called when a user posts an edit. It first validates the edit and then it it submits it to the database ''' j = fix_json(request.json) t = models.Sentence(oid=j[u'old'][u'_id']) unapprover = models.User(username=j[u'new'][u'unapprover']) try: t.unapprove(unapprover) return json.dumps({ "code:":200, "msg":"Unapproval Succeeded"}),200 except models.MyError as e: return json.dumps({ "code": e.code , "msg": e.msg }), e.code except models.LockError as e: return json.dumps({ "code": e.code , "msg": e.msg , "file_path": e.file_path, "username": e.username, "target_language": e.target_language}), e.code
def edit_translation(): ''' This function is called when a user posts an edit. It first validates the edit and then it it submits it to the database ''' try: j = fix_json(request.json) t = models.Sentence(oid=j[u'old'][u'_id']) editor = models.User(username=j[u'new'][u'editor']) t.edit(editor, j[u'new'][u'new_target_sentence']) return json.dumps({ "code": 200 , "msg": "Edit Succeeded" }), 200 except KeyError: return json.dumps({ "code": 401 , "msg": "Edit Failed" }), 500 except models.MyError as e: return json.dumps({ "code": e.code , "msg": e.msg }), e.code except models.LockError as e: return json.dumps({ "code": e.code , "msg": e.msg , "file_path": e.file_path, "username": e.username, "target_language": e.target_language}), e.code
def write_mongo(po_fn, userID, status, language, po_root, db): '''write a po_file to mongodb :Parameters: - 'po_fn': the file name of the current pofile - 'userID': the ID of the user that translated the po file - 'status': the status of the translations - 'language': The target_language of the translations (source assumed to be english) - 'po_root': the root of the po_files ''' po = polib.pofile(po_fn) rel_fn = os.path.relpath(po_fn, po_root) rel_fn = os.path.splitext(rel_fn)[0] f = models.File( { u'file_path': rel_fn, u'priority': 0, u'source_language': u'en', u'target_language': language }, curr_db=db) i = 0 reg = re.compile('^:[a-zA-Z0-9]+:`(?!.*<.*>.*)[^`]*`$') for entry in po.translated_entries(): sentence_status = status match = re.match(reg, entry.msgstr.encode('utf-8')) if match is not None and match.group() == entry.msgstr.encode('utf-8'): sentence_status = "approved" t = models.Sentence( { u'source_language': u'en', u'source_sentence': entry.msgid.encode('utf-8'), u'sentenceID': entry.tcomment.encode('utf-8'), u'sentence_num': i, u'fileID': f._id, u'target_sentence': entry.msgstr.encode('utf-8'), u'target_language': language, u'userID': userID, u'status': sentence_status, u'update_number': 0 }, curr_db=db) t.save() i += 1 f.get_num_sentences()
def sentence(self, id=None): '''This method wraps around the sentence creator to provide the correct db''' s = models.Sentence(oid=id, curr_db=self.db) return s