def fetch_sentences(): global LANGUAGES basename = request.args.get('basename') totalcount = 0 (sentencesobj, totalcount) = Sentence.limit(request.args.get('pagesize'), basename, request.args.get('status'), request.args.get('pageno')) corpus_obj = Corpus.objects(basename=basename) corpus_dict = json.loads(corpus_obj.to_json()) sentences_list = [] sources = [] if sentencesobj is not None: for sent in sentencesobj: sent_dict = json.loads(sent.to_json()) corpus = Sentence.objects(_id=sent_dict['_id']['$oid']) if sent_dict['status'] == STATUS_PENDING: corpus.update(set__status=STATUS_PROCESSING) sources.append(sent_dict['source']) target_lang = 'en' if 'target_lang' in corpus_dict[0] and corpus_dict[0]['target_lang'] is not None: target_lang = LANGUAGES[corpus_dict[0]['target_lang']] translation_list = translatesinglesentence(sources, target_lang) index = 0 for sent in sentencesobj: sent_dict = json.loads(sent.to_json()) sent_dict['translation'] = translation_list[index] sentences_list.append(sent_dict) index += 1 # print() # for sentence in sentencesobj: # # sentence.update(set__status=STATUS_PROCESSING, set__locked=True, set__locked_time=datetime.now()) # sentence.update(set__status=STATUS_PROCESSING) res = CustomResponse(Status.SUCCESS.value, sentences_list, totalcount) return res.getres()
def fetch_corpus(): if request.headers.get('ad-userid') is not None: log.info('fetch_corpus: initiated by ' + request.headers.get('ad-userid')) else: log.info('fetch_corpus: initiated by anonymous user') corpus = Corpus.objects.to_json() res = CustomResponse(Status.SUCCESS.value, json.loads(corpus)) return res.getres()
def process_files_law(basename, name): filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin.txt', app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt') filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng.txt', app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt') processhindi(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt') processenglish(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt') translatewithgoogle(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt', app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_tran.txt') os.system( './helpers/bleualign.py -s ' + os.getcwd() + '/upload/' + basename + '_hin_filtered' + '.txt' + ' -t ' + os.getcwd() + '/upload/' + basename + '_eng_filtered' + '.txt' + ' --srctotarget ' + os.getcwd() + '/upload/' + basename + '_eng_tran' + '.txt' + ' -o ' + os.getcwd() + '/upload/' + basename + '_output') english_res = [] hindi_res = [] english_points = [] english_points_words = [] hindi_points = [] hindi_points_words = [] f_eng = open(app.config['UPLOAD_FOLDER'] + '/' + basename + '_output-t', 'r') for f in f_eng: english_res.append(f) point = fetchwordsfromsentence(f, basename) english_points.append(point['avg']) english_points_words.append(point['values']) f_eng.close() f_hin = open(app.config['UPLOAD_FOLDER'] + '/' + basename + '_output-s', 'r') for f in f_hin: hindi_res.append(f) point = fetchwordsfromsentence(f, basename) hindi_points.append(point['avg']) hindi_points_words.append(point['values']) f_hin.close() data = {'hindi': hindi_res, 'english': english_res, 'english_scores': english_points, 'hindi_scores': hindi_points} sentences = [] for i in range(0, len(hindi_res)): sentence = Sentence(status=STATUS_PENDING, alignment_accuracy=english_res[i].split(':::::')[1], basename=name, source=hindi_res[i], target=english_res[i].split(':::::')[0], source_ocr_words=hindi_points_words[i], source_ocr=str(hindi_points[i]), target_ocr_words=english_points_words[i], target_ocr=str(english_points[i])) sentences.append(sentence) # sentence.save() Sentence.objects.insert(sentences) for f in glob.glob(app.config['UPLOAD_FOLDER'] + '/' + basename + '*'): os.remove(f) res = CustomResponse(Status.SUCCESS.value, data) # corpus = Corpus.objects(basename=basename) # corpus.update(set__status=STATUS_PROCESSED, # set__no_of_sentences=len(hindi_res)) return res.getres()
def fetch_translation_process(): log.info('fetch_translation_process : started at ' + str(getcurrenttime())) try: transalationProcess = TranslationProcess.objects(created_by=request.headers.get('ad-userid')).order_by( '-basename').to_json() res = CustomResponse(Status.SUCCESS.value, json.loads(transalationProcess)) except: log.info('fetch-translation-process : ERROR occured') pass log.info('fetch_translation_process : ended at ' + str(getcurrenttime())) return res.getres()
def update_corpus(): start_time = int(round(time.time() * 1000)) log.info('update_corpus: started at ' + str(start_time)) current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") req_data = request.get_json() validate_upload_corpus_request(req_data) res = CustomResponse(Status.SUCCESS.value, 'no. of sentences are ') end_time = int(round(time.time() * 1000)) log.info('upload_corpus: ended at ' + str(end_time) + 'total time elapsed = ' + str(end_time - start_time)) return res.getres()
def remove_junk(): basename = str(int(time.time())) f = request.files['file'] filepath_eng = os.path.join( app.config['UPLOAD_FOLDER'], basename + '_junk.txt') f.save(filepath_eng) f_eng = open(app.config['UPLOAD_FOLDER'] + '/' + basename + '_junk.txt', 'r') for t in f_eng: Sentence.objects(source=t).delete() res = CustomResponse(Status.SUCCESS.value, None) return res.getres()
def delete_process(): log.info('delete_process: started at ' + str(getcurrenttime())) try: basename = request.form.getlist('processname')[0] log.info('delte_process : requested basename is : ' + basename) translationProcess = TranslationProcess.objects(basename=basename).delete() log.info('delete_process: ended at ' + str(getcurrenttime())) res = CustomResponse(Status.SUCCESS.value, basename) except: log.info('delte_process : ERROR while processing basename : ' + basename) res = CustomResponse(Status.FAILURE.value, basename) return res.getres()
def convert_to_pdf(): body = request.get_json() upload_id = str(uuid4()) filename = body['filename'] filepath = os.path.join(NGINX_FOLDER, filename) try: result = convert_to(os.path.join(NGINX_FOLDER, 'pdf', upload_id), filepath, timeout=15) copyfile(result, os.path.join(NGINX_FOLDER, upload_id+'.pdf')) except LibreOfficeError: raise InternalServerErrorError({'message': 'Error when converting file to PDF'}) except TimeoutExpired: raise InternalServerErrorError({'message': 'Timeout when converting file to PDF'}) res = CustomResponse(Status.SUCCESS.value, upload_id+'.pdf') return res.getres()
def translate(): pool = mp.Pool(mp.cpu_count()) basename = str(int(time.time())) current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") f = request.files['file'] filepath = os.path.join( app.config['UPLOAD_FOLDER'], basename + '.pdf') translationProcess = TranslationProcess( status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename) translationProcess.save() f.save(filepath) pool.apply_async(converttoimage, args=( filepath, app.config['UPLOAD_FOLDER'], basename, '_hin'), callback=capturetext) pool.close() pool.join() filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin.txt', app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt') processenglish(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt') translatewithanuvadaeng(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt', app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_tran.txt') f_eng = open(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_tran.txt', 'r') english_res = [] hindi_res = [] for f in f_eng: english_res.append(f) f_eng.close() f_hin = open(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt', 'r') for f in f_hin: hindi_res.append(f) f_hin.close() data = {'hindi': hindi_res, 'english': english_res} translations = [] for i in range(0, len(hindi_res)): translation = Translation(basename=str( basename), source=hindi_res[i], target=english_res[i]) translations.append(translation) Translation.objects.insert(translations) for f in glob.glob(app.config['UPLOAD_FOLDER'] + '/' + basename + '*'): os.remove(f) res = CustomResponse(Status.SUCCESS.value, data) translationProcess = TranslationProcess.objects(basename=basename) translationProcess.update(set__status=STATUS_PROCESSED) return res.getres()
def update_password_admin(): log.info('update_password : started') body = request.get_json() user_id = body['user_id'] high_court_code = body['high_court_code'] new_password = body['new_password'] log.info("high_court_code == " + high_court_code) if high_court_code is not None: userHighCourt = Userhighcourt.objects(user_id=user_id) if userHighCourt is not None and len(userHighCourt) > 0: log.info('high court with user exist ' + str(len(userHighCourt))) userHighCourt.update(set__high_court_code=high_court_code) else: log.info('saving high court with user') user_high_court = Userhighcourt(high_court_code=high_court_code, user_id=user_id) user_high_court.save() profile = requests.get(PROFILE_REQ_URL + user_id).content profile = json.loads(profile) roles_ = get_user_roles_basic_auth(user_id) data = {"status": "false"} req = GATEWAY_SERVER_URL + 'credentials/basic-auth/' + user_id + '/status' response = requests.put(req, json=data) res = response.json() status = res['status'] log.info("status == " + status) if not status == 'Deactivated': res = CustomResponse(Status.ERROR_GATEWAY.value, None) return res.getres() data = { "credential": { "password": new_password, "scopes": roles_ }, "consumerId": user_id, "type": "basic-auth" } if new_password is not None or new_password.__len__() == 0: if new_password.__len__() < 6: log.info( 'update_password : password is too weak, at least provide 6 characters' ) res = CustomResponse(Status.ERROR_WEAK_PASSWORD.value, None) return res.getres() else: req = GATEWAY_SERVER_URL + 'credentials' response = requests.post(req, json=data) res = CustomResponse(Status.SUCCESS.value, None) return res.getres()
def batchsentences(): basename = request.args.get('basename') current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") sentences = Sentence.objects(basename=basename) corpus_obj = Corpus.objects(basename=basename) index = 2 batch_size = 10000 if len(sentences) > batch_size: for i in range(2, 1 + math.ceil(len(sentences) / batch_size)): base = str(uuid.uuid4()) if (i) * batch_size > len(sentences): sentence_batch = sentences[0:(i - 1) * batch_size - len(sentences)] print(len(sentence_batch)) if len(sentence_batch) > 0: corpus = Corpus(source_lang='English', target_lang='Hindi', status=STATUS_PROCESSED, name='SC Judgment 2019 Batch ' + str(index), domain='LAW', created_on=current_time, last_modified=current_time, author='', comment='', no_of_sentences=len(sentence_batch), basename=base) corpus.save() for sentence in sentence_batch: sentence_dict = json.loads(sentence.to_json()) sen = Sentence.objects(_id=sentence_dict['_id']['$oid']) print(sen.to_json()) sen.update(set__basename=base) else: sentence_batch = sentences[0:batch_size] print(len(sentence_batch)) if len(sentence_batch) > 0: corpus = Corpus(source_lang='English', target_lang='Hindi', status=STATUS_PROCESSED, name='SC Judgment 2019 Batch ' + str(index), domain='LAW', created_on=current_time, last_modified=current_time, author='', comment='', no_of_sentences=len(sentence_batch), basename=base) corpus.save() for sentence in sentence_batch: sentence_dict = json.loads(sentence.to_json()) sen = Sentence.objects(_id=sentence_dict['_id']['$oid']) print(sen.to_json()) sen.update(set__basename=base) index += 1 res = CustomResponse(Status.FAILURE.value, basename) return res.getres()
def upload_corpus(): start_time = int(round(time.time() * 1000)) log.info('upload_corpus: started at ' + str(start_time)) current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") req_data = request.get_json() validate_upload_corpus_request(req_data) lang_1 = req_data['lang_1'] lang_2 = req_data['lang_2'] created_by = req_data['created_by'] created_date = current_time # sentence is array of objects {src:'',tar:''} sentences = req_data['sentences'] domain = req_data[constants.DOMAIN] no_of_sentences = len(sentences) parallel_corpus_id = '' try: parallel_corpus_id = req_data[constants.parallel_corpus_id] except Exception as e: log.info('upload_corpus : parallel corpus id is NOT present') for sen in sentences: data = { constants.text_lang_1: sen['src'], constants.text_lang_2: sen['tar'], constants.lang_1: lang_1, constants.lang_2: lang_2, constants.parallel_corpus_id: parallel_corpus_id, constants.created_by: created_by, constants.created_date: created_date, constants.DOMAIN: domain } msg = {'data': data} producer.send(TOPIC_CORPUS_CREATION, value=msg) producer.flush() res = CustomResponse(Status.SUCCESS.value, 'no. of sentences are ' + str(no_of_sentences)) end_time = int(round(time.time() * 1000)) log.info('upload_corpus: ended at ' + str(end_time) + 'total time elapsed = ' + str(end_time - start_time)) return res.getres()
def save_student_masterdata(): body = request.get_json() if body['student'] is None or body['student']['student_id'] is None: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getres( ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status'] student_fromdb = Student.objects(student_id=body['student']['student_id']) if student_fromdb is not None and len(student_fromdb) > 0: res = CustomResponse(Status.USER_ALREADY_EXISTS.value, None) return res.getres(), Status.USER_ALREADY_EXISTS.value['http']['status'] else: student = Student(student_id=body['student']['student_id'], student_name=body['student']['student_name']) student.save() res = CustomResponse(Status.SUCCESS.value, None) return res.getres()
def post(self): parse = reqparse.RequestParser() parse.add_argument('file', type=werkzeug.datastructures.FileStorage, location='files',help='File is required', required=True) args = parse.parse_args() f = args['file'] # file_real_name, file_extension = os.path.splitext(f.filename) # filename = str(uuid.uuid4())+file_extension # filepath = os.path.join(config.download_folder, filename) # f.save(filepath) # with open(filepath, 'rb') as f: # filetype = magic.from_buffer(f.read(), mime=True) # f.close() # if filetype in ALLOWED_FILE_TYPES: # userfile = UserFiles(created_by=request.headers.get('ad-userid'), # filename=filename,file_real_name=file_real_name+file_extension, created_on=datetime.now()) # userfile.save() # res = CustomResponse(Status.SUCCESS.value, filename) # return res.getres() # else: # f.close() # os.remove(filepath) # res = CustomResponse(Status.ERROR_UNSUPPORTED_FILE.value, None) # return res.getresjson(), 400 file_real_name, file_extension = os.path.splitext(f.filename) fileallowed = False filename = str(uuid.uuid4())+file_extension filepath = os.path.join(config.download_folder, filename) for allowed_file_extension in ALLOWED_FILE_EXTENSIONS: if file_extension.endswith(allowed_file_extension): fileallowed = True break if fileallowed: f.save(filepath) file_size = os.stat(filepath).st_size file_size = file_size/(1024*1024) if file_size > 20: os.remove(filepath) res = CustomResponse(Status.ERROR_FILE_SIZE.value, None) return res.getresjson(), 400 userfile = UserFiles(created_by=request.headers.get('ad-userid'), filename=filename,file_real_name=file_real_name+file_extension, created_on=datetime.now()) userfile.save() res = CustomResponse(Status.SUCCESS.value, filename) return res.getres() else: res = CustomResponse(Status.ERROR_UNSUPPORTED_FILE.value, None) return res.getresjson(), 400
def translateFile(): pool = mp.Pool(mp.cpu_count()) basename = str(int(time.time())) current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") f = request.files['file'] filepath = os.path.join( app.config['UPLOAD_FOLDER'], basename + '.pdf') translationProcess = TranslationProcess( status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename) translationProcess.save() f.save(filepath) pool.apply_async(converttoimage, args=( filepath, app.config['UPLOAD_FOLDER'], basename, ''), callback=capturealtotext) pool.close() pool.join() res = CustomResponse(Status.SUCCESS.value, '') translationProcess = TranslationProcess.objects(basename=basename) translationProcess.update(set__status=STATUS_PROCESSED) return res.getres()
def create_user_basic_auth(): log.info('create_user_basic_auth : started') body = request.get_json() user_name = body['username'] firstname = body['firstname'] lastname = body['lastname'] password = body['password'] scope = body['roles'] high_court_code = body['high_court_code'] try: profile = requests.get(PROFILE_REQ_URL + user_name) try: profile = profile.json() if profile['isActive']: # _id = profile[''] log.info('create_user_oauth : profile is = : ' + str(profile)) res = CustomResponse(Status.USER_ALREADY_EXISTS.value, None) return res.getres() except: pass log.info('here') create_response = shell.create_user(user_name, firstname, lastname) log.info('user created') shell_response = shell.create_basic_auth_credentials( user_name, password) log.info('basic auth created') response = shell.create_oauth(user_name) log.info('oauth created') user = shell.get_user_info(user_name) log.info(str(user)) scope_response = shell.scope_add(user['id'], scope) time.sleep(3) log.info('scope added') if high_court_code is not None: user_high_court = Userhighcourt(high_court_code=high_court_code, user_id=user['id']) user_high_court.save() res = CustomResponse(Status.SUCCESS.value, response) return res.getres() except Exception as e: log.info(' create_user : error ' + str(e)) res = CustomResponse(Status.ERROR_GATEWAY.value, None) return res.getres()
def post(self): body = request.get_json() log_info("request received", MODULE_CONTEXT) try: record_id = body['record_id'] user_id = body['user_id'] if 'record_id' not in body or record_id is None or 'user_id' not in body or user_id is None: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,None) return res.getresjson(), 400 out_translated_doc, xlsx_file, txt_file = document_saving(record_id, user_id, DOWNLOAD_FOLDER) log_info("document saved successfully", MODULE_CONTEXT) res = CustomResponse(Status.SUCCESS.value, out_translated_doc, xlsx_file, txt_file) return res.getres() except ServiceError as e: log_exception("Error occured at resource level.", MODULE_CONTEXT, e) res = CustomResponse(Status.OPERATION_NOT_PERMITTED.value,None) return res.getresjson(), 400
def create_parallel_corpus(): global STATUS_ACTIVE body = request.get_json() if body['source_corpus'] is None or len(body['source_corpus']) == 0: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getres( ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status'] basename = str(uuid.uuid4()) target_corpus_id = str(uuid.uuid4()) source = body['source_corpus'] name = body['name'] domain = body['domain'] target_lang = body['target_lang'] source_lang = body['source_lang'] corpus = Singlecorpus(status=STATUS_ACTIVE, created_on=datetime.now(), name=name, corpusid=target_corpus_id, domain=domain, lang=target_lang, created_by=request.headers.get('ad-userid')) corpus.tags = [BASE_CORPUS, target_lang] corpus.save() source_corpus = Singlecorpus.objects(corpusid=source) if source_corpus is None or len(source_corpus) == 0: res = CustomResponse(Status.DATA_NOT_FOUND.value, None) return res.getres(), Status.DATA_NOT_FOUND.value['http']['status'] parallel_corpus = Parallelcorpus(source_lang=source_lang, target_lang=target_lang, name=name, domain=domain, basename=basename, source_id=source, target_id=target_corpus_id, status=STATUS_ACTIVE) parallel_corpus.save() source_sentences = Corpussentence.objects( Q(tags=source) & Q(original=True)) for source_sentence in source_sentences: source_sentence_dict = json.loads(source_sentence.to_json()) source_sentence_tags = source_sentence_dict['parallelcorpusid'] source_sentence_tags.append(basename) source_sentence.parallelcorpusid = source_sentence_tags source_sentence.save() res = CustomResponse(Status.SUCCESS.value, None) return res.getres()
def post(self): body = request.get_json() upload_id = str(uuid4()) filename = body['filename'] filepath = os.path.join(config.download_folder, filename) if filename.endswith('.pdf'): res = CustomResponse(Status.SUCCESS.value, filename) return res.getres() try: result = convert_to(os.path.join(config.download_folder, 'pdf', upload_id), filepath, timeout=60) copyfile(result, os.path.join(config.download_folder, upload_id + '.pdf')) userfile = UserFiles(created_by=request.headers.get('ad-userid'), filename=upload_id + '.pdf', created_on=datetime.now()) userfile.save() except LibreOfficeError as e: raise InternalServerErrorError( {'message': 'Error when converting file to PDF'}) except TimeoutExpired: raise InternalServerErrorError( {'message': 'Timeout when converting file to PDF'}) res = CustomResponse(Status.SUCCESS.value, upload_id + '.pdf') return res.getres()
def get(self): parse = reqparse.RequestParser() parse.add_argument('filename', type=str, location='args', help='Filename is required', required=True) parse.add_argument('userid', type=str, location='args', help='UserId is required', required=True) args = parse.parse_args() filename = args['filename'] userid = args['userid'] filepath = os.path.join(config.download_folder, filename) userfiles = UserFiles.objects(filename=filename, created_by=userid) if userfiles is not None and len(userfiles) > 0: if (os.path.exists(filepath)): result = send_file(filepath, as_attachment=True) result.headers["x-suggested-filename"] = filename return result else: res = CustomResponse(Status.ERROR_NOTFOUND_FILE.value, None) return res.getresjson(), 400 else: res = CustomResponse(Status.ERROR_NOTFOUND_FILE.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() userid = request.headers.get('userid') if 'pages' not in body or 'process_identifier' not in body or userid is None: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 results = body['pages'] process_identifier = body['process_identifier'] obj_to_be_saved = [] for result in results: page_data = {} page_data['page_no'] = result['page_no'] page_data['page_width'] = result['page_width'] page_data['page_height'] = result['page_height'] for block_type in BLOCK_TYPES: if result[block_type['key']] is not None: for data in result[block_type['key']]: obj_to_be_saved = self.make_obj( process_identifier, page_data, data, block_type['key'], obj_to_be_saved, userid) file_content_instances = [ FileContent(**data) for data in obj_to_be_saved ] FileContent.objects.insert(file_content_instances) res = CustomResponse(Status.SUCCESS.value, None) return res.getres()
def saveTranslateDocx(): start_time = int(round(time.time() * 1000)) log.info('uploadTranslateDocx: started at ' + str(start_time)) if (request.form.getlist('basename') is None or not isinstance(request.form.getlist('basename'), list)): res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getres( ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status'] basename = request.form.getlist('basename')[0] current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") f = request.files['file'] filepath = os.path.join(app.config['UPLOAD_FOLDER'], basename + '_u.docx') index = 0 while (os.path.exists(filepath)): filepath = os.path.join(app.config['UPLOAD_FOLDER'], basename + '_' + str(index) + '_u.docx') index = index + 1 f.save(filepath) res = CustomResponse(Status.SUCCESS.value, basename + '_' + str(index) + '_u' + '.docx') translationProcess = TranslationProcess.objects(basename=basename) translationProcess.update(set__translate_uploaded=True) log.info('uploadTranslateDocx: ended at ' + str(getcurrenttime()) + 'total time elapsed : ' + str(getcurrenttime() - start_time)) return res.getres()
def download_docx(): log.info('download-docx: started') filename = request.args.get('filename') if filename == '': return CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, 'filename missing').getres() try: filename_without_docx = filename.split('.docx')[0] n_filename = filename_without_docx.split('_') try: log.info('download-docx: finding process from basename : ' + str(n_filename[0])) translationProcess = TranslationProcess.objects( basename=n_filename[0]) if translationProcess is not None: data = translationProcess[0]['name'] if len(n_filename) > 1: data = data.split('.docx')[0] + '_translated.docx' log.info( 'download-docx: process found for basename with name = ' + str(data)) result = flask.send_file(os.path.join('upload/', filename), as_attachment=True, attachment_filename=data) result.headers["x-suggested-filename"] = data except Exception as e: log.info( 'download-docx: error in finding process for basename : ' + str(n_filename)) result = flask.send_file(os.path.join('upload/', filename), as_attachment=True, attachment_filename="default.docx") result.headers["x-suggested-filename"] = filename return result except Exception as e: return CustomResponse(Status.DATA_NOT_FOUND.value, 'file not found').getres()
def getfiledata(): pool = mp.Pool(mp.cpu_count()) basename = str(int(time.time())) current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") f = request.files['file'] filepath = os.path.join( app.config['UPLOAD_FOLDER'], basename + '.pdf') # translationProcess = TranslationProcess( # status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename) # translationProcess.save() f.save(filepath) pool.apply_async(converttoimage, args=( filepath, app.config['UPLOAD_FOLDER'], basename, '_eng'), callback=capturetext) pool.close() pool.join() filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng.txt', app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt') processenglish(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt') # translatewithanuvadaeng(app.config['UPLOAD_FOLDER'] + # '/'+basename+'_hin_filtered.txt', app.config['UPLOAD_FOLDER'] + # '/'+basename+'_eng_tran.txt') # f_eng = open(app.config['UPLOAD_FOLDER']+'/' + # basename + '_eng_filtered.txt', 'r') english_res = [] # hindi_res = [] # for f in f_eng: # english_res.append(f) # f_eng.close() f_eng = open(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt', 'r') for f in f_eng: english_res.append(f) f_eng.close() data = {'english': english_res} # translations = [] # for i in range(0, len(hindi_res)): # translation = Translation(basename=str( # basename), source=hindi_res[i], target=english_res[i]) # translations.append(translation) # Translation.objects.insert(translations) res = CustomResponse(Status.SUCCESS.value, data) result = flask.send_file(os.path.join('upload/', basename + '_eng_filtered.txt'), as_attachment=True) result.headers["x-suggested-filename"] = basename + '.txt' # translationProcess = TranslationProcess.objects(basename=basename) # translationProcess.update(set__status=STATUS_PROCESSED) return result
def upload_file_law(): pool = mp.Pool(mp.cpu_count()) basename = str(int(time.time())) try: current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") f = request.files['hindi'] f_eng = request.files['english'] filepath = os.path.join( app.config['UPLOAD_FOLDER'], basename + '_hin.pdf') filepath_eng = os.path.join( app.config['UPLOAD_FOLDER'], basename + '_eng.pdf') f.save(filepath) f_eng.save(filepath_eng) pool.apply_async(converttoimage, args=( filepath, app.config['UPLOAD_FOLDER'], basename, '_hin'), callback=capturetext) pool.apply_async(converttoimage, args=( filepath_eng, app.config['UPLOAD_FOLDER'], basename, '_eng'), callback=capturetext) pool.close() pool.join() return process_files_law(basename, 'OLD_LAW_CORPUS') except Exception as e: print(e) res = CustomResponse(Status.ERR_GLOBAL_SYSTEM.value, None) return res.getres(), Status.ERR_GLOBAL_SYSTEM.value['http']['status']
def update_sentences_status(): body = request.get_json() if (body['sentences'] is None or not isinstance(body['sentences'], list)): res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getres(), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status'] for sentence in body['sentences']: corpus = Sentence.objects(_id=sentence['_id']['$oid']) corpus.update(set__status=sentence['status']) res = CustomResponse(Status.SUCCESS.value, None) return res.getres()
def save_ocr_data(): body = request.get_json() if body['ocr_data'] is None: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getres( ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status'] ocr_data = Ocrdata(created_on=str(int(time.time())), data=body['ocr_data']['response']) ocr_data.save() res = CustomResponse(Status.SUCCESS.value, None) return res.getres()
def get_parallel_corpus_sentences_list(): basename = request.args.get('basename') if basename is None or len(basename) == 0: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getres( ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status'] parallel_corpus = Parallelcorpus.objects(basename=basename) if parallel_corpus is None or len(parallel_corpus) == 0: res = CustomResponse(Status.DATA_NOT_FOUND.value, None) return res.getres(), Status.DATA_NOT_FOUND.value['http']['status'] parallel_corpus_dict = json.loads(parallel_corpus.to_json()) source_sentences = Corpussentence.objects.filter( Q(tags=parallel_corpus_dict[0]['source_id']) & Q(parallelcorpusid=basename)).order_by('index') target_sentences = Corpussentence.objects.filter( Q(tags=parallel_corpus_dict[0]['target_id']) & Q(parallelcorpusid=basename)).order_by('index') data = { 'source': json.loads(source_sentences.to_json()), 'target': json.loads(target_sentences.to_json()) } res = CustomResponse(Status.SUCCESS.value, data) return res.getres()
def get(self): parse = reqparse.RequestParser() parse.add_argument('filename', type=str, location='args',help='Filename is required', required=True) args = parse.parse_args() filename = args['filename'] filepath = os.path.join(config.download_folder, filename) if(os.path.exists(filepath)): with open(filepath) as json_file: data = json.load(json_file) res = CustomResponse(Status.SUCCESS.value, data) return res.getres() else: res = CustomResponse(Status.ERROR_NOTFOUND_FILE.value, None) return res.getresjson(), 400
def create_user_oauth(): log.info('create_user_oauth : started') body = request.get_json() user_name = body['username'] try: response = shell.create_oauth(user_name) res = CustomResponse(Status.SUCCESS.value, response) return res.getres() except Exception as e: log.info('create_user_oauth : error ' + str(e)) res = CustomResponse(Status.ERROR_GATEWAY.value, None) return res.getres()