Example #1
0
def fetch_sentences():
    global LANGUAGES
    basename = request.args.get('basename')
    totalcount = 0
    (sentencesobj, totalcount) = Sentence.limit(request.args.get('pagesize'), basename, request.args.get('status'),
                                                request.args.get('pageno'))
    corpus_obj = Corpus.objects(basename=basename)
    corpus_dict = json.loads(corpus_obj.to_json())
    sentences_list = []
    sources = []
    if sentencesobj is not None:
        for sent in sentencesobj:
            sent_dict = json.loads(sent.to_json())
            corpus = Sentence.objects(_id=sent_dict['_id']['$oid'])
            if sent_dict['status'] == STATUS_PENDING:
                corpus.update(set__status=STATUS_PROCESSING)
            sources.append(sent_dict['source'])
        target_lang = 'en'
        if 'target_lang' in corpus_dict[0] and corpus_dict[0]['target_lang'] is not None:
            target_lang = LANGUAGES[corpus_dict[0]['target_lang']]
        translation_list = translatesinglesentence(sources, target_lang)
        index = 0
        for sent in sentencesobj:
            sent_dict = json.loads(sent.to_json())
            sent_dict['translation'] = translation_list[index]
            sentences_list.append(sent_dict)
            index += 1
            # print() 
        # for sentence in sentencesobj:
        #     # sentence.update(set__status=STATUS_PROCESSING, set__locked=True, set__locked_time=datetime.now())
        #     sentence.update(set__status=STATUS_PROCESSING)
    res = CustomResponse(Status.SUCCESS.value, sentences_list, totalcount)
    return res.getres()
Example #2
0
def fetch_corpus():
    if request.headers.get('ad-userid') is not None:
        log.info('fetch_corpus: initiated by ' + request.headers.get('ad-userid'))
    else:
        log.info('fetch_corpus: initiated by anonymous user')
    corpus = Corpus.objects.to_json()
    res = CustomResponse(Status.SUCCESS.value, json.loads(corpus))
    return res.getres()
Example #3
0
def process_files_law(basename, name):
    filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin.txt',
               app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt')
    filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng.txt',
               app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt')
    processhindi(app.config['UPLOAD_FOLDER'] +
                 '/' + basename + '_hin_filtered.txt')
    processenglish(app.config['UPLOAD_FOLDER'] +
                   '/' + basename + '_eng_filtered.txt')
    translatewithgoogle(app.config['UPLOAD_FOLDER'] +
                        '/' + basename + '_hin_filtered.txt', app.config['UPLOAD_FOLDER'] +
                        '/' + basename + '_eng_tran.txt')
    os.system(
        './helpers/bleualign.py -s ' + os.getcwd() + '/upload/' + basename + '_hin_filtered' + '.txt' + ' -t ' + os.getcwd() + '/upload/' + basename +
        '_eng_filtered' + '.txt' + ' --srctotarget ' + os.getcwd() + '/upload/' + basename + '_eng_tran' + '.txt' + ' -o ' + os.getcwd() + '/upload/' + basename + '_output')
    english_res = []
    hindi_res = []
    english_points = []
    english_points_words = []
    hindi_points = []
    hindi_points_words = []
    f_eng = open(app.config['UPLOAD_FOLDER'] +
                 '/' + basename + '_output-t', 'r')
    for f in f_eng:
        english_res.append(f)
        point = fetchwordsfromsentence(f, basename)
        english_points.append(point['avg'])
        english_points_words.append(point['values'])
    f_eng.close()
    f_hin = open(app.config['UPLOAD_FOLDER'] +
                 '/' + basename + '_output-s', 'r')
    for f in f_hin:
        hindi_res.append(f)
        point = fetchwordsfromsentence(f, basename)
        hindi_points.append(point['avg'])
        hindi_points_words.append(point['values'])
    f_hin.close()
    data = {'hindi': hindi_res, 'english': english_res,
            'english_scores': english_points, 'hindi_scores': hindi_points}
    sentences = []
    for i in range(0, len(hindi_res)):
        sentence = Sentence(status=STATUS_PENDING, alignment_accuracy=english_res[i].split(':::::')[1], basename=name,
                            source=hindi_res[i], target=english_res[i].split(':::::')[0],
                            source_ocr_words=hindi_points_words[i], source_ocr=str(hindi_points[i]),
                            target_ocr_words=english_points_words[i], target_ocr=str(english_points[i]))
        sentences.append(sentence)
        # sentence.save()
    Sentence.objects.insert(sentences)
    for f in glob.glob(app.config['UPLOAD_FOLDER'] + '/' + basename + '*'):
        os.remove(f)
    res = CustomResponse(Status.SUCCESS.value, data)
    # corpus = Corpus.objects(basename=basename)
    # corpus.update(set__status=STATUS_PROCESSED,
    #               set__no_of_sentences=len(hindi_res))
    return res.getres()
Example #4
0
def fetch_translation_process():
    log.info('fetch_translation_process : started at ' + str(getcurrenttime()))
    try:
        transalationProcess = TranslationProcess.objects(created_by=request.headers.get('ad-userid')).order_by(
            '-basename').to_json()
        res = CustomResponse(Status.SUCCESS.value, json.loads(transalationProcess))
    except:
        log.info('fetch-translation-process : ERROR occured')
        pass
    log.info('fetch_translation_process : ended at ' + str(getcurrenttime()))
    return res.getres()
Example #5
0
def update_corpus():
    start_time = int(round(time.time() * 1000))
    log.info('update_corpus: started at ' + str(start_time))
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    req_data = request.get_json()
    validate_upload_corpus_request(req_data)
    res = CustomResponse(Status.SUCCESS.value, 'no. of sentences are ')
    end_time = int(round(time.time() * 1000))
    log.info('upload_corpus: ended at ' + str(end_time) +
             'total time elapsed = ' + str(end_time - start_time))
    return res.getres()
Example #6
0
def remove_junk():
    basename = str(int(time.time()))
    f = request.files['file']
    filepath_eng = os.path.join(
        app.config['UPLOAD_FOLDER'], basename + '_junk.txt')
    f.save(filepath_eng)
    f_eng = open(app.config['UPLOAD_FOLDER'] + '/' + basename + '_junk.txt', 'r')
    for t in f_eng:
        Sentence.objects(source=t).delete()
    res = CustomResponse(Status.SUCCESS.value, None)
    return res.getres()
Example #7
0
def delete_process():
    log.info('delete_process: started at ' + str(getcurrenttime()))
    try:
        basename = request.form.getlist('processname')[0]
        log.info('delte_process : requested basename is : ' + basename)
        translationProcess = TranslationProcess.objects(basename=basename).delete()
        log.info('delete_process: ended at ' + str(getcurrenttime()))
        res = CustomResponse(Status.SUCCESS.value, basename)
    except:
        log.info('delte_process : ERROR while processing  basename  : ' + basename)
        res = CustomResponse(Status.FAILURE.value, basename)
    return res.getres()
def convert_to_pdf():
    body = request.get_json()
    upload_id = str(uuid4())
    filename = body['filename']
    filepath = os.path.join(NGINX_FOLDER, filename)
    try:
        result = convert_to(os.path.join(NGINX_FOLDER, 'pdf', upload_id), filepath, timeout=15)
        copyfile(result, os.path.join(NGINX_FOLDER, upload_id+'.pdf'))
    except LibreOfficeError:
        raise InternalServerErrorError({'message': 'Error when converting file to PDF'})
    except TimeoutExpired:
        raise InternalServerErrorError({'message': 'Timeout when converting file to PDF'})
    res = CustomResponse(Status.SUCCESS.value, upload_id+'.pdf')
    return res.getres()
Example #9
0
def translate():
    pool = mp.Pool(mp.cpu_count())
    basename = str(int(time.time()))
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    f = request.files['file']
    filepath = os.path.join(
        app.config['UPLOAD_FOLDER'], basename + '.pdf')
    translationProcess = TranslationProcess(
        status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename)
    translationProcess.save()
    f.save(filepath)
    pool.apply_async(converttoimage, args=(
        filepath, app.config['UPLOAD_FOLDER'], basename, '_hin'), callback=capturetext)
    pool.close()
    pool.join()
    filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin.txt',
               app.config['UPLOAD_FOLDER'] + '/' + basename + '_hin_filtered.txt')
    processenglish(app.config['UPLOAD_FOLDER'] +
                   '/' + basename + '_hin_filtered.txt')
    translatewithanuvadaeng(app.config['UPLOAD_FOLDER'] +
                            '/' + basename + '_hin_filtered.txt', app.config['UPLOAD_FOLDER'] +
                            '/' + basename + '_eng_tran.txt')
    f_eng = open(app.config['UPLOAD_FOLDER'] + '/' +
                 basename + '_eng_tran.txt', 'r')
    english_res = []
    hindi_res = []
    for f in f_eng:
        english_res.append(f)
    f_eng.close()
    f_hin = open(app.config['UPLOAD_FOLDER'] + '/' +
                 basename + '_hin_filtered.txt', 'r')
    for f in f_hin:
        hindi_res.append(f)
    f_hin.close()
    data = {'hindi': hindi_res, 'english': english_res}
    translations = []
    for i in range(0, len(hindi_res)):
        translation = Translation(basename=str(
            basename), source=hindi_res[i], target=english_res[i])
        translations.append(translation)
    Translation.objects.insert(translations)
    for f in glob.glob(app.config['UPLOAD_FOLDER'] + '/' + basename + '*'):
        os.remove(f)
    res = CustomResponse(Status.SUCCESS.value, data)
    translationProcess = TranslationProcess.objects(basename=basename)
    translationProcess.update(set__status=STATUS_PROCESSED)
    return res.getres()
Example #10
0
def update_password_admin():
    log.info('update_password : started')
    body = request.get_json()
    user_id = body['user_id']
    high_court_code = body['high_court_code']
    new_password = body['new_password']
    log.info("high_court_code == " + high_court_code)
    if high_court_code is not None:
        userHighCourt = Userhighcourt.objects(user_id=user_id)
        if userHighCourt is not None and len(userHighCourt) > 0:
            log.info('high court with user exist ' + str(len(userHighCourt)))
            userHighCourt.update(set__high_court_code=high_court_code)
        else:
            log.info('saving high court with user')
            user_high_court = Userhighcourt(high_court_code=high_court_code,
                                            user_id=user_id)
            user_high_court.save()
    profile = requests.get(PROFILE_REQ_URL + user_id).content
    profile = json.loads(profile)
    roles_ = get_user_roles_basic_auth(user_id)

    data = {"status": "false"}
    req = GATEWAY_SERVER_URL + 'credentials/basic-auth/' + user_id + '/status'
    response = requests.put(req, json=data)
    res = response.json()
    status = res['status']
    log.info("status == " + status)
    if not status == 'Deactivated':
        res = CustomResponse(Status.ERROR_GATEWAY.value, None)
        return res.getres()

    data = {
        "credential": {
            "password": new_password,
            "scopes": roles_
        },
        "consumerId": user_id,
        "type": "basic-auth"
    }
    if new_password is not None or new_password.__len__() == 0:
        if new_password.__len__() < 6:
            log.info(
                'update_password : password is too weak, at least provide 6 characters'
            )
            res = CustomResponse(Status.ERROR_WEAK_PASSWORD.value, None)
            return res.getres()
        else:
            req = GATEWAY_SERVER_URL + 'credentials'
            response = requests.post(req, json=data)
    res = CustomResponse(Status.SUCCESS.value, None)
    return res.getres()
Example #11
0
def batchsentences():
    basename = request.args.get('basename')
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    sentences = Sentence.objects(basename=basename)
    corpus_obj = Corpus.objects(basename=basename)
    index = 2
    batch_size = 10000
    if len(sentences) > batch_size:
        for i in range(2, 1 + math.ceil(len(sentences) / batch_size)):
            base = str(uuid.uuid4())
            if (i) * batch_size > len(sentences):
                sentence_batch = sentences[0:(i - 1) * batch_size - len(sentences)]
                print(len(sentence_batch))
                if len(sentence_batch) > 0:
                    corpus = Corpus(source_lang='English', target_lang='Hindi', status=STATUS_PROCESSED,
                                    name='SC Judgment 2019 Batch ' + str(index), domain='LAW', created_on=current_time,
                                    last_modified=current_time, author='', comment='',
                                    no_of_sentences=len(sentence_batch),
                                    basename=base)
                    corpus.save()

                    for sentence in sentence_batch:
                        sentence_dict = json.loads(sentence.to_json())
                        sen = Sentence.objects(_id=sentence_dict['_id']['$oid'])
                        print(sen.to_json())
                        sen.update(set__basename=base)
            else:
                sentence_batch = sentences[0:batch_size]
                print(len(sentence_batch))
                if len(sentence_batch) > 0:
                    corpus = Corpus(source_lang='English', target_lang='Hindi', status=STATUS_PROCESSED,
                                    name='SC Judgment 2019 Batch ' + str(index), domain='LAW', created_on=current_time,
                                    last_modified=current_time, author='', comment='',
                                    no_of_sentences=len(sentence_batch),
                                    basename=base)
                    corpus.save()
                    for sentence in sentence_batch:
                        sentence_dict = json.loads(sentence.to_json())
                        sen = Sentence.objects(_id=sentence_dict['_id']['$oid'])
                        print(sen.to_json())
                        sen.update(set__basename=base)
            index += 1
    res = CustomResponse(Status.FAILURE.value, basename)
    return res.getres()
Example #12
0
def upload_corpus():
    start_time = int(round(time.time() * 1000))
    log.info('upload_corpus: started at ' + str(start_time))
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    req_data = request.get_json()
    validate_upload_corpus_request(req_data)

    lang_1 = req_data['lang_1']
    lang_2 = req_data['lang_2']
    created_by = req_data['created_by']
    created_date = current_time
    # sentence is array of objects {src:'',tar:''}
    sentences = req_data['sentences']
    domain = req_data[constants.DOMAIN]
    no_of_sentences = len(sentences)
    parallel_corpus_id = ''

    try:
        parallel_corpus_id = req_data[constants.parallel_corpus_id]
    except Exception as e:
        log.info('upload_corpus :  parallel corpus id is NOT present')

    for sen in sentences:
        data = {
            constants.text_lang_1: sen['src'],
            constants.text_lang_2: sen['tar'],
            constants.lang_1: lang_1,
            constants.lang_2: lang_2,
            constants.parallel_corpus_id: parallel_corpus_id,
            constants.created_by: created_by,
            constants.created_date: created_date,
            constants.DOMAIN: domain
        }
        msg = {'data': data}
        producer.send(TOPIC_CORPUS_CREATION, value=msg)
        producer.flush()

    res = CustomResponse(Status.SUCCESS.value,
                         'no. of sentences are ' + str(no_of_sentences))
    end_time = int(round(time.time() * 1000))
    log.info('upload_corpus: ended at ' + str(end_time) +
             'total time elapsed = ' + str(end_time - start_time))
    return res.getres()
Example #13
0
def save_student_masterdata():
    body = request.get_json()
    if body['student'] is None or body['student']['student_id'] is None:
        res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
        return res.getres(
        ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status']
    student_fromdb = Student.objects(student_id=body['student']['student_id'])
    if student_fromdb is not None and len(student_fromdb) > 0:
        res = CustomResponse(Status.USER_ALREADY_EXISTS.value, None)
        return res.getres(), Status.USER_ALREADY_EXISTS.value['http']['status']
    else:
        student = Student(student_id=body['student']['student_id'],
                          student_name=body['student']['student_name'])
        student.save()
    res = CustomResponse(Status.SUCCESS.value, None)
    return res.getres()
Example #14
0
 def post(self):
     parse = reqparse.RequestParser()
     parse.add_argument('file', type=werkzeug.datastructures.FileStorage, location='files',help='File is required', required=True)
     args = parse.parse_args()
     f = args['file']
     # file_real_name, file_extension = os.path.splitext(f.filename)
     # filename = str(uuid.uuid4())+file_extension
     # filepath = os.path.join(config.download_folder, filename)
     # f.save(filepath)
     # with open(filepath, 'rb') as f:
     #     filetype = magic.from_buffer(f.read(), mime=True)
     #     f.close()
     #     if filetype in ALLOWED_FILE_TYPES:
     #         userfile = UserFiles(created_by=request.headers.get('ad-userid'),
     #                                     filename=filename,file_real_name=file_real_name+file_extension, created_on=datetime.now())
     #         userfile.save()
     #         res = CustomResponse(Status.SUCCESS.value, filename)
     #         return res.getres()
     #     else:
     #         f.close()
     #         os.remove(filepath)
     #         res = CustomResponse(Status.ERROR_UNSUPPORTED_FILE.value, None)
     #         return res.getresjson(), 400
     file_real_name, file_extension = os.path.splitext(f.filename)
     fileallowed = False
     filename = str(uuid.uuid4())+file_extension
     filepath = os.path.join(config.download_folder, filename)
     for allowed_file_extension in ALLOWED_FILE_EXTENSIONS:
         if file_extension.endswith(allowed_file_extension):
             fileallowed = True
             break
     if fileallowed:
         f.save(filepath)
         file_size = os.stat(filepath).st_size
         file_size = file_size/(1024*1024)
         if file_size  > 20:
             os.remove(filepath)
             res = CustomResponse(Status.ERROR_FILE_SIZE.value, None)
             return res.getresjson(), 400
         userfile = UserFiles(created_by=request.headers.get('ad-userid'),
                                         filename=filename,file_real_name=file_real_name+file_extension, created_on=datetime.now())
         userfile.save()
         res = CustomResponse(Status.SUCCESS.value, filename)
         return res.getres()
     else:
         res = CustomResponse(Status.ERROR_UNSUPPORTED_FILE.value, None)
         return res.getresjson(), 400
Example #15
0
def translateFile():
    pool = mp.Pool(mp.cpu_count())
    basename = str(int(time.time()))
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    f = request.files['file']
    filepath = os.path.join(
        app.config['UPLOAD_FOLDER'], basename + '.pdf')
    translationProcess = TranslationProcess(
        status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename)
    translationProcess.save()
    f.save(filepath)
    pool.apply_async(converttoimage, args=(
        filepath, app.config['UPLOAD_FOLDER'], basename, ''), callback=capturealtotext)
    pool.close()
    pool.join()

    res = CustomResponse(Status.SUCCESS.value, '')
    translationProcess = TranslationProcess.objects(basename=basename)
    translationProcess.update(set__status=STATUS_PROCESSED)
    return res.getres()
Example #16
0
def create_user_basic_auth():
    log.info('create_user_basic_auth : started')
    body = request.get_json()
    user_name = body['username']
    firstname = body['firstname']
    lastname = body['lastname']
    password = body['password']
    scope = body['roles']
    high_court_code = body['high_court_code']

    try:
        profile = requests.get(PROFILE_REQ_URL + user_name)
        try:
            profile = profile.json()
            if profile['isActive']:
                # _id = profile['']
                log.info('create_user_oauth : profile is = : ' + str(profile))
                res = CustomResponse(Status.USER_ALREADY_EXISTS.value, None)
                return res.getres()
        except:
            pass

        log.info('here')
        create_response = shell.create_user(user_name, firstname, lastname)
        log.info('user created')
        shell_response = shell.create_basic_auth_credentials(
            user_name, password)
        log.info('basic auth created')
        response = shell.create_oauth(user_name)
        log.info('oauth created')
        user = shell.get_user_info(user_name)
        log.info(str(user))
        scope_response = shell.scope_add(user['id'], scope)
        time.sleep(3)
        log.info('scope added')
        if high_court_code is not None:
            user_high_court = Userhighcourt(high_court_code=high_court_code,
                                            user_id=user['id'])
            user_high_court.save()
        res = CustomResponse(Status.SUCCESS.value, response)
        return res.getres()

    except Exception as e:
        log.info(' create_user : error ' + str(e))
        res = CustomResponse(Status.ERROR_GATEWAY.value, None)
        return res.getres()
Example #17
0
 def post(self):
     body = request.get_json()
     log_info("request received", MODULE_CONTEXT)
     try:
         record_id = body['record_id']
         user_id = body['user_id']
         if 'record_id' not in body or record_id is None or 'user_id' not in body or user_id is None:
             res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,None)
             return res.getresjson(), 400
         out_translated_doc, xlsx_file, txt_file = document_saving(record_id, user_id, DOWNLOAD_FOLDER)
         log_info("document saved successfully", MODULE_CONTEXT)
         res = CustomResponse(Status.SUCCESS.value, out_translated_doc, xlsx_file, txt_file)
         return res.getres()
     except ServiceError as e:
         log_exception("Error occured at resource level.", MODULE_CONTEXT, e)
         res = CustomResponse(Status.OPERATION_NOT_PERMITTED.value,None)
         return res.getresjson(), 400
Example #18
0
def create_parallel_corpus():
    global STATUS_ACTIVE
    body = request.get_json()
    if body['source_corpus'] is None or len(body['source_corpus']) == 0:
        res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
        return res.getres(
        ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status']
    basename = str(uuid.uuid4())
    target_corpus_id = str(uuid.uuid4())
    source = body['source_corpus']
    name = body['name']
    domain = body['domain']
    target_lang = body['target_lang']
    source_lang = body['source_lang']
    corpus = Singlecorpus(status=STATUS_ACTIVE,
                          created_on=datetime.now(),
                          name=name,
                          corpusid=target_corpus_id,
                          domain=domain,
                          lang=target_lang,
                          created_by=request.headers.get('ad-userid'))
    corpus.tags = [BASE_CORPUS, target_lang]
    corpus.save()
    source_corpus = Singlecorpus.objects(corpusid=source)
    if source_corpus is None or len(source_corpus) == 0:
        res = CustomResponse(Status.DATA_NOT_FOUND.value, None)
        return res.getres(), Status.DATA_NOT_FOUND.value['http']['status']
    parallel_corpus = Parallelcorpus(source_lang=source_lang,
                                     target_lang=target_lang,
                                     name=name,
                                     domain=domain,
                                     basename=basename,
                                     source_id=source,
                                     target_id=target_corpus_id,
                                     status=STATUS_ACTIVE)
    parallel_corpus.save()
    source_sentences = Corpussentence.objects(
        Q(tags=source) & Q(original=True))
    for source_sentence in source_sentences:
        source_sentence_dict = json.loads(source_sentence.to_json())
        source_sentence_tags = source_sentence_dict['parallelcorpusid']
        source_sentence_tags.append(basename)
        source_sentence.parallelcorpusid = source_sentence_tags
        source_sentence.save()
    res = CustomResponse(Status.SUCCESS.value, None)
    return res.getres()
Example #19
0
 def post(self):
     body = request.get_json()
     upload_id = str(uuid4())
     filename = body['filename']
     filepath = os.path.join(config.download_folder, filename)
     if filename.endswith('.pdf'):
         res = CustomResponse(Status.SUCCESS.value, filename)
         return res.getres()
     try:
         result = convert_to(os.path.join(config.download_folder, 'pdf',
                                          upload_id),
                             filepath,
                             timeout=60)
         copyfile(result,
                  os.path.join(config.download_folder, upload_id + '.pdf'))
         userfile = UserFiles(created_by=request.headers.get('ad-userid'),
                              filename=upload_id + '.pdf',
                              created_on=datetime.now())
         userfile.save()
     except LibreOfficeError as e:
         raise InternalServerErrorError(
             {'message': 'Error when converting file to PDF'})
     except TimeoutExpired:
         raise InternalServerErrorError(
             {'message': 'Timeout when converting file to PDF'})
     res = CustomResponse(Status.SUCCESS.value, upload_id + '.pdf')
     return res.getres()
Example #20
0
 def get(self):
     parse = reqparse.RequestParser()
     parse.add_argument('filename',
                        type=str,
                        location='args',
                        help='Filename is required',
                        required=True)
     parse.add_argument('userid',
                        type=str,
                        location='args',
                        help='UserId is required',
                        required=True)
     args = parse.parse_args()
     filename = args['filename']
     userid = args['userid']
     filepath = os.path.join(config.download_folder, filename)
     userfiles = UserFiles.objects(filename=filename, created_by=userid)
     if userfiles is not None and len(userfiles) > 0:
         if (os.path.exists(filepath)):
             result = send_file(filepath, as_attachment=True)
             result.headers["x-suggested-filename"] = filename
             return result
         else:
             res = CustomResponse(Status.ERROR_NOTFOUND_FILE.value, None)
             return res.getresjson(), 400
     else:
         res = CustomResponse(Status.ERROR_NOTFOUND_FILE.value, None)
         return res.getresjson(), 400
Example #21
0
 def post(self):
     body = request.get_json()
     userid = request.headers.get('userid')
     if 'pages' not in body or 'process_identifier' not in body or userid is None:
         res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                              None)
         return res.getresjson(), 400
     results = body['pages']
     process_identifier = body['process_identifier']
     obj_to_be_saved = []
     for result in results:
         page_data = {}
         page_data['page_no'] = result['page_no']
         page_data['page_width'] = result['page_width']
         page_data['page_height'] = result['page_height']
         for block_type in BLOCK_TYPES:
             if result[block_type['key']] is not None:
                 for data in result[block_type['key']]:
                     obj_to_be_saved = self.make_obj(
                         process_identifier, page_data, data,
                         block_type['key'], obj_to_be_saved, userid)
     file_content_instances = [
         FileContent(**data) for data in obj_to_be_saved
     ]
     FileContent.objects.insert(file_content_instances)
     res = CustomResponse(Status.SUCCESS.value, None)
     return res.getres()
def saveTranslateDocx():
    start_time = int(round(time.time() * 1000))
    log.info('uploadTranslateDocx: started at ' + str(start_time))
    if (request.form.getlist('basename') is None
            or not isinstance(request.form.getlist('basename'), list)):
        res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
        return res.getres(
        ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status']
    basename = request.form.getlist('basename')[0]
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    f = request.files['file']
    filepath = os.path.join(app.config['UPLOAD_FOLDER'], basename + '_u.docx')
    index = 0
    while (os.path.exists(filepath)):
        filepath = os.path.join(app.config['UPLOAD_FOLDER'],
                                basename + '_' + str(index) + '_u.docx')
        index = index + 1
    f.save(filepath)
    res = CustomResponse(Status.SUCCESS.value,
                         basename + '_' + str(index) + '_u' + '.docx')
    translationProcess = TranslationProcess.objects(basename=basename)
    translationProcess.update(set__translate_uploaded=True)

    log.info('uploadTranslateDocx: ended at ' + str(getcurrenttime()) +
             'total time elapsed : ' + str(getcurrenttime() - start_time))
    return res.getres()
def download_docx():
    log.info('download-docx: started')
    filename = request.args.get('filename')
    if filename == '':
        return CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value,
                              'filename missing').getres()
    try:
        filename_without_docx = filename.split('.docx')[0]
        n_filename = filename_without_docx.split('_')
        try:
            log.info('download-docx: finding process from basename : ' +
                     str(n_filename[0]))
            translationProcess = TranslationProcess.objects(
                basename=n_filename[0])
            if translationProcess is not None:
                data = translationProcess[0]['name']
                if len(n_filename) > 1:
                    data = data.split('.docx')[0] + '_translated.docx'
                log.info(
                    'download-docx: process found for basename with name = ' +
                    str(data))
                result = flask.send_file(os.path.join('upload/', filename),
                                         as_attachment=True,
                                         attachment_filename=data)
                result.headers["x-suggested-filename"] = data
        except Exception as e:
            log.info(
                'download-docx: error in finding process for basename : ' +
                str(n_filename))
            result = flask.send_file(os.path.join('upload/', filename),
                                     as_attachment=True,
                                     attachment_filename="default.docx")
            result.headers["x-suggested-filename"] = filename
        return result
    except Exception as e:
        return CustomResponse(Status.DATA_NOT_FOUND.value,
                              'file not found').getres()
Example #24
0
def getfiledata():
    pool = mp.Pool(mp.cpu_count())
    basename = str(int(time.time()))
    current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
    f = request.files['file']
    filepath = os.path.join(
        app.config['UPLOAD_FOLDER'], basename + '.pdf')
    # translationProcess = TranslationProcess(
    #     status=STATUS_PROCESSING, name=f.filename, created_on=current_time, basename=basename)
    # translationProcess.save()
    f.save(filepath)
    pool.apply_async(converttoimage, args=(
        filepath, app.config['UPLOAD_FOLDER'], basename, '_eng'), callback=capturetext)
    pool.close()
    pool.join()
    filtertext(app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng.txt',
               app.config['UPLOAD_FOLDER'] + '/' + basename + '_eng_filtered.txt')
    processenglish(app.config['UPLOAD_FOLDER'] +
                   '/' + basename + '_eng_filtered.txt')
    # translatewithanuvadaeng(app.config['UPLOAD_FOLDER'] +
    #                      '/'+basename+'_hin_filtered.txt', app.config['UPLOAD_FOLDER'] +
    #                      '/'+basename+'_eng_tran.txt')
    # f_eng = open(app.config['UPLOAD_FOLDER']+'/' +
    #              basename + '_eng_filtered.txt', 'r')
    english_res = []
    # hindi_res = []
    # for f in f_eng:
    #     english_res.append(f)
    # f_eng.close()
    f_eng = open(app.config['UPLOAD_FOLDER'] + '/' +
                 basename + '_eng_filtered.txt', 'r')
    for f in f_eng:
        english_res.append(f)
    f_eng.close()
    data = {'english': english_res}
    # translations = []
    # for i in range(0, len(hindi_res)):
    #     translation = Translation(basename=str(
    #         basename), source=hindi_res[i], target=english_res[i])
    #     translations.append(translation)
    # Translation.objects.insert(translations)

    res = CustomResponse(Status.SUCCESS.value, data)
    result = flask.send_file(os.path.join('upload/', basename + '_eng_filtered.txt'), as_attachment=True)
    result.headers["x-suggested-filename"] = basename + '.txt'

    # translationProcess = TranslationProcess.objects(basename=basename)
    # translationProcess.update(set__status=STATUS_PROCESSED)
    return result
Example #25
0
def upload_file_law():
    pool = mp.Pool(mp.cpu_count())
    basename = str(int(time.time()))
    try:
        current_time = datetime.now().strftime("%m/%d/%Y, %H:%M:%S")
        f = request.files['hindi']
        f_eng = request.files['english']
        filepath = os.path.join(
            app.config['UPLOAD_FOLDER'], basename + '_hin.pdf')
        filepath_eng = os.path.join(
            app.config['UPLOAD_FOLDER'], basename + '_eng.pdf')
        f.save(filepath)
        f_eng.save(filepath_eng)
        pool.apply_async(converttoimage, args=(
            filepath, app.config['UPLOAD_FOLDER'], basename, '_hin'), callback=capturetext)
        pool.apply_async(converttoimage, args=(
            filepath_eng, app.config['UPLOAD_FOLDER'], basename, '_eng'), callback=capturetext)
        pool.close()
        pool.join()
        return process_files_law(basename, 'OLD_LAW_CORPUS')
    except Exception as e:
        print(e)
        res = CustomResponse(Status.ERR_GLOBAL_SYSTEM.value, None)
        return res.getres(), Status.ERR_GLOBAL_SYSTEM.value['http']['status']
Example #26
0
def update_sentences_status():
    body = request.get_json()
    if (body['sentences'] is None or not isinstance(body['sentences'], list)):
        res = CustomResponse(
            Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
        return res.getres(), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status']
    for sentence in body['sentences']:
        corpus = Sentence.objects(_id=sentence['_id']['$oid'])
        corpus.update(set__status=sentence['status'])
    res = CustomResponse(Status.SUCCESS.value, None)
    return res.getres()
Example #27
0
def save_ocr_data():
    body = request.get_json()
    if body['ocr_data'] is None:
        res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
        return res.getres(
        ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status']
    ocr_data = Ocrdata(created_on=str(int(time.time())),
                       data=body['ocr_data']['response'])
    ocr_data.save()
    res = CustomResponse(Status.SUCCESS.value, None)
    return res.getres()
Example #28
0
def get_parallel_corpus_sentences_list():
    basename = request.args.get('basename')
    if basename is None or len(basename) == 0:
        res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None)
        return res.getres(
        ), Status.ERR_GLOBAL_MISSING_PARAMETERS.value['http']['status']
    parallel_corpus = Parallelcorpus.objects(basename=basename)
    if parallel_corpus is None or len(parallel_corpus) == 0:
        res = CustomResponse(Status.DATA_NOT_FOUND.value, None)
        return res.getres(), Status.DATA_NOT_FOUND.value['http']['status']
    parallel_corpus_dict = json.loads(parallel_corpus.to_json())
    source_sentences = Corpussentence.objects.filter(
        Q(tags=parallel_corpus_dict[0]['source_id'])
        & Q(parallelcorpusid=basename)).order_by('index')
    target_sentences = Corpussentence.objects.filter(
        Q(tags=parallel_corpus_dict[0]['target_id'])
        & Q(parallelcorpusid=basename)).order_by('index')
    data = {
        'source': json.loads(source_sentences.to_json()),
        'target': json.loads(target_sentences.to_json())
    }
    res = CustomResponse(Status.SUCCESS.value, data)
    return res.getres()
Example #29
0
 def get(self):
     parse = reqparse.RequestParser()
     parse.add_argument('filename', type=str, location='args',help='Filename is required', required=True)
     args = parse.parse_args()
     filename = args['filename']
     filepath = os.path.join(config.download_folder, filename)
     if(os.path.exists(filepath)):
         with open(filepath) as json_file:
             data = json.load(json_file)
             res = CustomResponse(Status.SUCCESS.value, data)
             return res.getres()
     else:
         res = CustomResponse(Status.ERROR_NOTFOUND_FILE.value, None)
         return res.getresjson(), 400
Example #30
0
def create_user_oauth():
    log.info('create_user_oauth : started')
    body = request.get_json()
    user_name = body['username']

    try:

        response = shell.create_oauth(user_name)
        res = CustomResponse(Status.SUCCESS.value, response)
        return res.getres()
    except Exception as e:
        log.info('create_user_oauth : error ' + str(e))
        res = CustomResponse(Status.ERROR_GATEWAY.value, None)
        return res.getres()