def translate_from_scratch(request, source, target, title, aid, template_name="wt_articles/translate_form.html"): """ Loads a source article by provided article id (aid) and generates formsets to contain each sentence in the requested translation. """ sa_set = SourceArticle.objects.filter(id=aid) if len(sa_set) < 1: no_match = True return render_to_response(template_name, {"no_match": True}, context_instance=RequestContext(request)) article = sa_set[0] ss_list = article.sourcesentence_set.all() TranslatedSentenceSet = formset_factory(TranslatedSentenceMappingForm, extra=0) if request.method == "POST": formset = TranslatedSentenceSet(request.POST, request.FILES) if formset.is_valid(): ts_list = [] ta = TranslatedArticle() for form in formset.forms: ss = form.cleaned_data['source_sentence'] text = form.cleaned_data['text'] ts = TranslatedSentence( segment_id=ss.segment_id, source_sentence=ss, text=text, translated_by=request.user.username, translation_date=datetime.now(), language=target, best=True, ### TODO figure something better out end_of_paragraph=ss.end_of_paragraph) ts_list.append(ts) ta.article = ss.article ta.title = ss.article.title ta.timestamp = datetime.now() ta.language = target ta.save() for ts in ts_list: ts.save() ta.sentences = ts_list ta.save() return HttpResponseRedirect(ta.get_absolute_url()) else: initial_ss_set = [{'source_sentence': s} for s in ss_list] formset = TranslatedSentenceSet(initial=initial_ss_set) for form, s in zip(formset.forms, ss_list): form.fields['text'].label = s.text return render_to_response(template_name, { "formset": formset, "title": article.title, }, context_instance=RequestContext(request))
def translate_from_scratch(request, source, target, title, aid, template_name="wt_articles/translate_form.html"): """ Loads a source article by provided article id (aid) and generates formsets to contain each sentence in the requested translation. """ sa_set = SourceArticle.objects.filter(id=aid) if len(sa_set) < 1: return render_to_response(template_name, {"no_match": True}, context_instance=RequestContext(request)) article = sa_set[0] ss_list = article.sourcesentence_set.all() TranslatedSentenceSet = formset_factory(TranslatedSentenceMappingForm, extra=0) if request.method == "POST": formset = TranslatedSentenceSet(request.POST, request.FILES) if formset.is_valid(): ts_list = [] trans_art = TranslatedArticle() for form in formset.forms: src_sent = form.cleaned_data['source_sentence'] text = form.cleaned_data['text'] trans_sent = TranslatedSentence( segment_id=src_sent.segment_id, source_sentence=src_sent, text=text, translated_by=request.user.username, translation_date=datetime.now(), language=target, best=True, end_of_paragraph=src_sent.end_of_paragraph) ts_list.append(trans_sent) trans_art.article = src_sent.article trans_art.title = src_sent.article.title trans_art.timestamp = datetime.now() trans_art.language = target trans_art.save() for trans_sent in ts_list: trans_sent.save() trans_art.sentences = ts_list trans_art.save() return HttpResponseRedirect(trans_art.get_absolute_url()) else: initial_ss_set = [{'source_sentence': s} for s in ss_list] formset = TranslatedSentenceSet(initial=initial_ss_set) for form, sent in zip(formset.forms, ss_list): form.fields['text'].label = sent.text return render_to_response(template_name, { "formset": formset, "title": article.title, }, context_instance=RequestContext(request))
def translate_from_scratch(request, source, target, title, aid, template_name="wt_articles/translate_form.html"): """ aid in this context is the source article id """ sa_set = SourceArticle.objects.filter(id=aid) if len(sa_set) < 1: no_match = True return render_to_response(template_name, {"no_match": True}, context_instance=RequestContext(request)) article = sa_set[0] ss_list = article.sourcesentence_set.all() TranslatedSentenceSet = formset_factory(TranslatedSentenceMappingForm, extra=0) if request.method == "POST": formset = TranslatedSentenceSet(request.POST, request.FILES) if formset.is_valid(): ts_list = [] ta = TranslatedArticle() for form in formset.forms: ss = form.cleaned_data['source_sentence'] text = form.cleaned_data['text'] ts = TranslatedSentence(segment_id=ss.segment_id, source_sentence=ss, text=text, translated_by=request.user.username, translation_date=datetime.now(), language=target, best=True, ### TODO figure something better out end_of_paragraph=ss.end_of_paragraph) ts_list.append(ts) ta.article = ss.article ta.title = ss.article.title ta.timestamp = datetime.now() ta.language = target ta.save() for ts in ts_list: ts.save() ta.sentences = ts_list ta.save() return HttpResponseRedirect(ta.get_absolute_url()) else: initial_ss_set = [{'source_sentence': s} for s in ss_list] formset = TranslatedSentenceSet(initial=initial_ss_set) for form,s in zip(formset.forms,ss_list): form.fields['text'].label = s.text return render_to_response(template_name, { "formset": formset, "title": article.title, }, context_instance=RequestContext(request))
def handle_noargs(self, **options): t = apertium_translator() reqs = TranslationRequest.objects.filter(translator=APERTIUM) completed_reqs = list() ta_sentences = list() for req in reqs: req_sentences = req.article.sourcesentence_set.all() translated_title = t.translate(req.article.title, source=req.article.language, target=req.target_language) for s in req_sentences: translated = t.translate(s.text, source=s.article.language, target=req.target_language) ts = TranslatedSentence(segment_id=s.segment_id, source_sentence=s, text=translated, translated_by=t.name, translation_date=datetime.now(), language=req.target_language, best=True, end_of_paragraph=s.end_of_paragraph) ta_sentences.append(ts) ta = TranslatedArticle() ta.article = req.article ta.title = translated_title ta.timestamp = datetime.now() ta.language = req.target_language try: ta.save() for ts in ta_sentences: ts.save() ta.sentences = ta_sentences ta.save() completed_reqs.append(req) for cr in completed_reqs: cr.delete() except Exception as e: print type(e) print e.args ta.delete() for ts in ta_sentences: ts.delete() raise
def parse_result_file(self, result_file, source_lang, target_lang): f = open(result_file, 'r') csv_reader = unicode_csv_reader(f) headers = csv_reader.next() header_map = {} for i, h in enumerate(headers): header_map[h] = i # not assuming a specific order for the fields sa = None cur_aid = -1 segment_ids = [ header_map[x] for x in ['Input.seg_id%d' % i for i in range(1, 11)] ] segments = [ header_map[x] for x in ['Input.seg%d' % i for i in range(1, 11)] ] translations = [ header_map[x] for x in ['Answer.translation%d' % i for i in range(1, 11)] ] ta = None has_title = 'Input.article' in header_map for line in csv_reader: if has_title: title = line[header_map['Input.article']] + ' (translated)' else: title = 'Noname (translated)' approved = (line[header_map['AssignmentStatus']] == 'Approved') for i in range(10): try: (aid, seg_id) = line[segment_ids[i]].split('_') except ValueError: # treating this basically like an eof break if cur_aid != int(aid): if sa: # save the previous SourceArticle sa.save(manually_splitting=True) # check if the document is already imported if not has_title: title = aid + ' ' + title try: sa = SourceArticle.objects.filter( language=source_lang).get(doc_id=aid) sa.sentences_processed = True cur_aid = int(aid) sa.language = source_lang sa.doc_id = aid sa.timestamp = datetime.now() sa.title = title sa.save(manually_splitting=True) # get an id for the SourceArticle instance except SourceArticle.DoesNotExist: # make a new sa object sa = SourceArticle() sa.sentences_processed = True cur_aid = int(aid) language = source_lang sa.language = language sa.doc_id = aid sa.timestamp = datetime.now() sa.title = title sa.save(manually_splitting=True) # get an id for the SourceArticle instance if ta: # save the previous target article ta.save() # check if the target article has been translated and imported try: ta = TranslatedArticle.objects.filter(article=sa).get( language=target_lang) # if there is one, do not touch unknown fields. ta.title = title ta.timestamp = datetime.now() ta.language = target_lang ta.approved = approved ta.save() except TranslatedArticle.DoesNotExist: # make a new TranslatedSentence object ta = TranslatedArticle() ta.article = sa ta.title = title ta.timestamp = datetime.now() ta.language = target_lang ta.approved = approved ta.save() end_of_paragraph = True tag_id = 'Input.tag%d' % i if tag_id in header_map: tag = line[header_map[tag_id]] end_of_paragraph = re.search("LastSentence", tag) or False seg = line[segments[i]] try: # do not touch end_of_paragraph because we do not know ss = sa.sourcesentence_set.get(segment_id=seg_id) ss.text = seg ss.segment_id = seg_id ss.end_of_paragraph = end_of_paragraph ss.save() except SourceSentence.DoesNotExist: ss = SourceSentence() ss.article = sa ss.text = seg ss.segment_id = seg_id ss.end_of_paragraph = end_of_paragraph ss.save() sa.source_text += seg + u'\n' translation = line[translations[i]] try: ts = ta.sentences.get(segment_id=seg_id) ts.source_sentence = ss ts.text = translation ts.translated_by = line[header_map['WorkerId']] ts.language = target_lang date_string = line[header_map['SubmitTime']] df = date_string.split(' ') tf = df[3].split(':') ts.translation_date = datetime(int(df[5]), [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ].index(df[1]) + 1, int(df[2]), int(tf[0]), int(tf[1]), int(tf[2])) ts.approved = approved ts.end_of_paragraph = ss.end_of_paragraph ts.save() except TranslatedSentence.DoesNotExist: ts = TranslatedSentence() ts.segment_id = seg_id ts.source_sentence = ss ts.text = translation ts.translated_by = line[header_map['WorkerId']] ts.language = target_lang date_string = line[header_map['SubmitTime']] df = date_string.split(' ') tf = df[3].split(':') ts.translation_date = datetime(int(df[5]), [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ].index(df[1]) + 1, int(df[2]), int(tf[0]), int(tf[1]), int(tf[2])) ts.approved = approved ts.end_of_paragraph = ss.end_of_paragraph ts.save() ta.sentences.add(ts) if sa: sa.save(manually_splitting=True) if ta: ta.save()
def parse_result_file(self, result_file, source_lang, target_lang): f = open(result_file, "r") csv_reader = unicode_csv_reader(f) headers = csv_reader.next() header_map = {} for i, h in enumerate(headers): header_map[h] = i # not assuming a specific order for the fields sa = None cur_aid = -1 segment_ids = [header_map[x] for x in ["Input.seg_id%d" % i for i in range(1, 11)]] segments = [header_map[x] for x in ["Input.seg%d" % i for i in range(1, 11)]] translations = [header_map[x] for x in ["Answer.translation%d" % i for i in range(1, 11)]] ta = None has_title = "Input.article" in header_map for line in csv_reader: if has_title: title = line[header_map["Input.article"]] + " (translated)" else: title = "Noname (translated)" approved = line[header_map["AssignmentStatus"]] == "Approved" for i in range(10): try: (aid, seg_id) = line[segment_ids[i]].split("_") except ValueError: # treating this basically like an eof break if cur_aid != int(aid): if sa: # save the previous SourceArticle sa.save(manually_splitting=True) # check if the document is already imported if not has_title: title = aid + " " + title try: sa = SourceArticle.objects.filter(language=source_lang).get(doc_id=aid) sa.sentences_processed = True cur_aid = int(aid) sa.language = source_lang sa.doc_id = aid sa.timestamp = datetime.now() sa.title = title sa.save(manually_splitting=True) # get an id for the SourceArticle instance except SourceArticle.DoesNotExist: # make a new sa object sa = SourceArticle() sa.sentences_processed = True cur_aid = int(aid) language = source_lang sa.language = language sa.doc_id = aid sa.timestamp = datetime.now() sa.title = title sa.save(manually_splitting=True) # get an id for the SourceArticle instance if ta: # save the previous target article ta.save() # check if the target article has been translated and imported try: ta = TranslatedArticle.objects.filter(article=sa).get(language=target_lang) # if there is one, do not touch unknown fields. ta.title = title ta.timestamp = datetime.now() ta.language = target_lang ta.approved = approved ta.save() except TranslatedArticle.DoesNotExist: # make a new TranslatedSentence object ta = TranslatedArticle() ta.article = sa ta.title = title ta.timestamp = datetime.now() ta.language = target_lang ta.approved = approved ta.save() end_of_paragraph = True tag_id = "Input.tag%d" % i if tag_id in header_map: tag = line[header_map[tag_id]] end_of_paragraph = re.search("LastSentence", tag) or False seg = line[segments[i]] try: # do not touch end_of_paragraph because we do not know ss = sa.sourcesentence_set.get(segment_id=seg_id) ss.text = seg ss.segment_id = seg_id ss.end_of_paragraph = end_of_paragraph ss.save() except SourceSentence.DoesNotExist: ss = SourceSentence() ss.article = sa ss.text = seg ss.segment_id = seg_id ss.end_of_paragraph = end_of_paragraph ss.save() sa.source_text += seg + u"\n" translation = line[translations[i]] try: ts = ta.sentences.get(segment_id=seg_id) ts.source_sentence = ss ts.text = translation ts.translated_by = line[header_map["WorkerId"]] ts.language = target_lang date_string = line[header_map["SubmitTime"]] df = date_string.split(" ") tf = df[3].split(":") ts.translation_date = datetime( int(df[5]), ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"].index( df[1] ) + 1, int(df[2]), int(tf[0]), int(tf[1]), int(tf[2]), ) ts.approved = approved ts.end_of_paragraph = ss.end_of_paragraph ts.save() except TranslatedSentence.DoesNotExist: ts = TranslatedSentence() ts.segment_id = seg_id ts.source_sentence = ss ts.text = translation ts.translated_by = line[header_map["WorkerId"]] ts.language = target_lang date_string = line[header_map["SubmitTime"]] df = date_string.split(" ") tf = df[3].split(":") ts.translation_date = datetime( int(df[5]), ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"].index( df[1] ) + 1, int(df[2]), int(tf[0]), int(tf[1]), int(tf[2]), ) ts.approved = approved ts.end_of_paragraph = ss.end_of_paragraph ts.save() ta.sentences.add(ts) if sa: sa.save(manually_splitting=True) if ta: ta.save()