def tilde_decode(s):
    x = BytesIO(s).read().decode('utf-8')
    assert x[0] == '#'
    x = x[1:]
    x = untilde(x.replace('tilde', 'xilde'))
    x = '# ' + x.replace('xilde', 'tilde')

    return x, len(x)
Example #2
0
def tilde_decode(s):
    x = BytesIO(s).read().decode("utf-8")
    assert x[0] == "#"
    x = x[1:]
    x = untilde(x.replace("tilde", "xilde"))
    x = "# " + x.replace("xilde", "tilde")

    return x, len(x)
def predict():
    operation = BytesIO(base64.urlsafe_b64decode(request.form['operation']))
    CNN = ConvolutionalNeuralNetwork()
    operation = CNN.predict(operation)
    n_operation = operation.replace('x', '*')

    count = n_operation.count('√')
    while (count > 0):
        pos = n_operation.find('√')
        if pos != -1:
            sqrt_buff = ''
            i = 1
            while (pos + i < len(n_operation)
                   and n_operation[pos + i].isdigit()):
                sqrt_buff += n_operation[pos + i]
                i = i + 1
            root = math.sqrt(int(sqrt_buff))
            n_operation = n_operation.replace('√' + sqrt_buff, str(root))
        count = count - 1

    return json.dumps({
        'operation': operation,
        #'solution': calculate_operation(operation)
        'solution': eval(n_operation)
    })
Example #4
0
                    def writer(row, last):
                        if isscalar(row) or row.ndim == 0:
                            outfile.write(startindent + '  ' +
                                          str(row.filled().astype(ndarray)) +
                                          ';\n')
                            return

                        tmpstr = StringIO()
                        if ma.getmaskarray(row).all():
                            tmpstr.write(b', '.join([b'_'] * row.size) + b', ')
                        else:
                            savetxt(tmpstr,
                                    ma.filled(row),
                                    fmt,
                                    delimiter=commaspace,
                                    newline=commaspace)
                        if last:
                            tmpstr.seek(-2, 1)
                            tmpstr.write(semicolon)
                        tmpstr.seek(0, 0)
                        tmpstr = tmpstr.read().decode('ASCII')
                        tmpstr = tmpstr.replace(
                            fmt % getattr(row, 'fill_value', 0) + ',', '_,')
                        tmpstr = textwrap.fill(
                            tmpstr,
                            line_length,
                            initial_indent=startindent + '  ',
                            subsequent_indent=startindent + '    ')
                        try:
                            outfile.write(tmpstr)
                            outfile.write('\n')
                        except Exception as e:
                            exception_handler(e, outfile)
Example #5
0
def ungzip_files(filename, where_to=None, fLOG=noLOG, fvalid=None, remove_space=True, unzip=True):
    """
    decompress files from a gzip file

    @param      filename        final gzip file (double compression, extension should something like .zip.gz)
    @param      where_to        destination folder (can be None, the result is a list of tuple)
    @param      fLOG            logging function
    @param      fvalid          function which takes two paths (zip name, local name) and return True if the file
                                must be unzipped, False otherwise, if None, the default answer is True
    @param      remove_space    remove spaces in created local path (+ ``',()``)
    @param      unzip           unzip file after gzip
    @return                     list of unzipped files

    .. versionadded:: 1.4
    """
    if sys.version_info[0] == 2:
        if isinstance(filename, bytearray):
            filename = BytesIO(filename)
    else:
        if isinstance(filename, bytes):
            filename = BytesIO(filename)
    f = gzip.open(filename, 'rb')
    content = f.read()
    f.close()
    if unzip:
        return unzip_files(content, where_to=where_to, fLOG=fLOG)
    else:
        filename = filename.replace(".gz", "")
        with open(filename, "wb") as f:
            f.write(content)
        return filename
Example #6
0
def ungzip_files(filename,
                 where_to=None,
                 fLOG=noLOG,
                 fvalid=None,
                 remove_space=True,
                 unzip=True,
                 encoding=None):
    """
    Uncompresses files from a gzip file.

    @param      filename        final gzip file (double compression, extension should something like .zip.gz)
    @param      where_to        destination folder (can be None, the result is a list of tuple)
    @param      fLOG            logging function
    @param      fvalid          function which takes two paths (zip name, local name) and return True if the file
                                must be unzipped, False otherwise, if None, the default answer is True
    @param      remove_space    remove spaces in created local path (+ ``',()``)
    @param      unzip           unzip file after gzip
    @param      encoding        encoding
    @return                     list of unzipped files
    """
    if isinstance(filename, bytes):
        is_file = False
        filename = BytesIO(filename)
    else:
        is_file = True

    if encoding is None:
        f = gzip.open(filename, 'rb')
        content = f.read()
        f.close()
        if unzip:
            try:
                return unzip_files(content, where_to=where_to, fLOG=fLOG)
            except Exception as e:  # pragma: no cover
                raise IOError(
                    "Unable to unzip file '{0}'".format(filename)) from e
        elif where_to is not None:
            filename = os.path.split(filename)[-1].replace(".gz", "")
            filename = os.path.join(where_to, filename)
            with open(filename, "wb") as f:
                f.write(content)
            return filename
        return content
    else:
        f = gzip.open(filename, 'rt', encoding="utf-8")
        content = f.read()
        f.close()
        if is_file:
            filename = filename.replace(".gz", "")
            with open(filename, "wb") as f:
                f.write(content)
            return filename
        return content
Example #7
0
                    def writer(row, last):
                        if isscalar(row) or row.ndim == 0:
                            outfile.write(startindent + '  ' +
                                          str(row.filled().astype(ndarray)) +
                                          ';\n')
                            return
                        #old = get_printoptions()
                        #set_printoptions(threshold = inf, linewidth = line_length)
                        #tmpstr =  startindent + '    ' + array2string(row, separator = commaspace, formatter = funcs).replace('\n', '\n' + startindent + '    ')[1:-1].replace('--', '_')
                        #if last:
                        #    tmpstr += ';'
                        #else:
                        #    tmpstr += commaspace
                        #set_printoptions(**old)
                        #outfile.write(tmpstr)
                        #outfile.write('\n')

                        tmpstr = StringIO()
                        if ma.getmaskarray(row).all():
                            tmpstr.write(b', '.join([b'_'] * row.size) + b', ')
                        else:
                            savetxt(tmpstr,
                                    ma.filled(row),
                                    fmt,
                                    delimiter=commaspace,
                                    newline=commaspace)
                        if last:
                            tmpstr.seek(-2, 1)
                            tmpstr.write(semicolon)
                        tmpstr.seek(0, 0)
                        tmpstr = tmpstr.read().decode('ASCII')
                        tmpstr = tmpstr.replace(
                            fmt % getattr(row, 'fill_value', 0) + ',', '_,')
                        tmpstr = textwrap.fill(
                            tmpstr,
                            line_length,
                            initial_indent=startindent + '  ',
                            subsequent_indent=startindent + '    ')
                        try:
                            outfile.write(tmpstr)
                            outfile.write('\n')
                        except Exception as e:
                            exception_handler(e, outfile)
Example #8
0
'''
BDS_unpack_mod = bytes(
    r'(\*\*\*\\n\", n\);\n)[\t ]+exit\(8\);\n[\t ]+for'.encode('ascii'))
if 'build_ext' in sys.argv and not os.path.exists(here + '/src/wgrib.c'):
    print('Downloading wgrib source code...')
    request = urllib.request.urlopen(wgrib_url)
    with open(here + '/src/wgrib.c', 'wb') as wgrib_src:
        src = BytesIO(request.read().replace(
            c_main, define + c_main.replace(b'main', b'GRIB_MAIN')))
        try:
            src = re.sub(BDS_unpack_mod, b'\\1\treturn;\tfor', src.getvalue())
        except:
            src = re.sub(bytes(BDS_unpack_mod.encode('ascii')),
                         b'\\1\treturn;\tfor', src.getvalue())
        src = src.replace(b'exit(', b'return(')

        wgrib_src.write(src)

if 'build_ext' in sys.argv and not isWindows() and not os.path.isdir(
        here + '/src/grib2'):
    tarfilepath = os.path.join(here, 'src', os.path.basename(wgrib2_url))
    if not os.path.exists(tarfilepath):
        print('Downloading wgrib2 source code...')
        request = urllib.request.urlopen(wgrib2_url)
        with open(tarfilepath, 'wb') as tgz:
            tgz.write(request.read())
    print('Extracting src/{}...'.format(os.path.basename(tarfilepath)))
    with tarfile.open(tarfilepath, mode='r:gz') as tgz:
        for name in tgz.getnames():
            print('extracting src/{}'.format(name))
Example #9
0
    def pdf(self, fp, csv_row):
        password = ''
        extracted_text = ''
        self.parser = PDFParser(fp)
        self.document_t = PDFDocument
        pf = PdfFileReader
        # isEncrypted
        try:
            i = 0
            try:
                thread = Thread(target=self.load_pdf,
                                args=(PDFDocument, password))
                thread.start()
                thread.join(timeout=90)
            except Exception as e:
                print('PDF I/O error: ' + e.__str__())
                row = [
                    self.line_count,
                    'PDF DOCUMENT OBJECT FAILED TO LOAD - ' + e.__str__() +
                    ': ' + self.url,
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                    '',
                ]
                # self.line_count += 1
                report_path = self.report_folder + self.report_name
                # 90 SECONDS or LOAD FAIL
                with open(report_path, 'a', encoding='utf8',
                          newline='') as csv_file:
                    writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
                    writer.dialect.lineterminator.replace('\n', '')
                    writer.writerow(row)

            stop_event.set()
            document = PDFDocument
            document = self.document_t
            pf = PdfFileReader(BytesIO(open(self.pdf_path, 'rb').read()))

            # ENCRYPTION
            if self.parser.doc.encryption is not None:
                csv_row.insert(4, [self.csv_header[4], 'ENCRYPTED'])
                csv_row.insert(5, [self.csv_header[5], 'ENCRYPTED'])
            else:
                csv_row.insert(4, [self.csv_header[4], 'FALSE'])
                csv_row.insert(5, [self.csv_header[5], 'NA'])
        except Exception as e:
            csv_row.insert(4, [self.csv_header[4], 'FAILED: ' + e.__str__()])
            csv_row.insert(5, [self.csv_header[5], 'NA'])
            exit_call = e.__str__() + ' document failed!!'
            print(exit_call)
            pass

        page_count = 0
        # istagged
        try:
            pages = PDFPage.get_pages(document)
            if not document.is_extractable:
                raise PDFTextExtractionNotAllowed
            rsrcmgr = PDFResourceManager()
            laparams = LAParams()
            page_no = 0
            istagged = 'FALSE'
            try:
                # document.catalog
                if document.catalog['MarkInfo']:
                    istagged = 'TRUE'
            except Exception as e:
                exit_call = e.__str__() + ' tagged info failed!!'
                print(exit_call)
            page_count = resolve1(document.catalog['Pages'])['Count']
            csv_row.insert(6, [self.csv_header[6], istagged])
            csv_row.insert(7, [self.csv_header[7], page_count])
        except Exception as e:
            csv_row.insert(6, [self.csv_header[6], 'IsTagged: ' + e.__str__()])
            csv_row.insert(7,
                           [self.csv_header[7], 'Page Count: ' + e.__str__()])
            exit_call = e.__str__() + ' tagged info failed!!'
            print(exit_call)
        # TOC
        try:
            if pf.outlines:
                csv_row.insert(8, [self.csv_header[8], 'TRUE'])
                '''pdf_path_toc = self.document_folder + pdf_name + '_toc.txt'
                places_list = pf.outlines

                with open(pdf_path_toc, 'w') as filehandle:
                    filehandle.writelines("%s\n" % place for place in places_list)
                filehandle.close()'''
            else:
                csv_row.insert(8, [self.csv_header[8], 'FALSE'])
        except Exception as e:
            csv_row.insert(8,
                           [self.csv_header[8], 'TOC FAILED: ' + e.__str__()])
            exit_call = e.__str__() + ' toc info failed!!'
            print(exit_call)
        # isForm, fields,
        try:
            if pf.getFields():
                csv_row.insert(9, [self.csv_header[9], 'TRUE'])
                csv_row.insert(10,
                               [self.csv_header[10],
                                pf.getFields().__len__()])
            else:
                csv_row.insert(9, [self.csv_header[9], 'FALSE'])
                csv_row.insert(10, [self.csv_header[10], 0])
        except Exception as e:
            csv_row.insert(9, [self.csv_header[9], 'FORMS: ' + e.__str__()])
            csv_row.insert(10, [self.csv_header[10], 'FIELDS: ' + e.__str__()])
            exit_call = e.__str__() + ' forms failed!!'
            print(exit_call)
        # tables
        csv_row.insert(11, [self.csv_header[11], 'NOT RUN'])
        write_clip = ''
        word_count = 0
        words_per_page = 0
        char_count = 0
        chars_per_word = 0
        image_count = 0
        # TODO: write 3 page sample and word count
        try:
            if pf.getNumPages() < 50:
                for page in range(pf.getNumPages()):
                    p = pf.getPage(page)
                    text_clip = p.extractText().encode('UTF-8')
                    text_clip = BytesIO(text_clip).read().__str__()[2:]
                    count_clip = re.findall(r"[^\W_]+", text_clip,
                                            re.MULTILINE)
                    word_count += len(count_clip)
                    char_count += len(text_clip)
                    if page <= 3:
                        write_clip += '[ PAGE ' + (page +
                                                   1).__str__() + ' START ] '
                        write_clip += text_clip.replace('\n', '').replace(
                            ',', ' ').replace('"', '')
                        write_clip += '[ PAGE ' + (page +
                                                   1).__str__() + ' END ]'
            else:
                write_clip = 'OVER 50 PAGES - SAMPLE SKIPPED'
        except Exception as e:
            exit_call = e.__str__() + ' :: TEXT sample failed!!'
            write_clip = exit_call
            word_count = exit_call
            char_count = exit_call
            print(exit_call)
        # TODO: Words/chars per page
        try:
            if not word_count == 0:
                chars_per_word = char_count / word_count
            else:
                chars_per_word = 0
            if not page_count == 0:
                words_per_page = word_count / page_count
            else:
                words_per_page = 0
        except Exception as e:
            exit_call = e.__str__() + ' :: WORD METRICS failed!!'
            chars_per_word = exit_call
            words_per_page = exit_call
            print(exit_call)
        # TODO: Add to row
        i = 12
        try:
            csv_row.insert(i, [self.csv_header[i], word_count.__str__()])
        except Exception as e:
            csv_row.insert(i,
                           [self.csv_header[i], 'WORD_COUNT: ' + e.__str__()])
        i = 13
        try:
            csv_row.insert(i, [self.csv_header[i], char_count.__str__()])
        except Exception as e:
            csv_row.insert(i,
                           [self.csv_header[i], 'CHAR_COUNT: ' + e.__str__()])
        i = 14
        try:
            csv_row.insert(i, [self.csv_header[i], words_per_page.__str__()])
        except Exception as e:
            csv_row.insert(i, [self.csv_header[i], 'WPP: ' + e.__str__()])
        i = 15
        try:
            csv_row.insert(i, [self.csv_header[i], chars_per_word.__str__()])
        except Exception as e:
            csv_row.insert(i, [self.csv_header[i], 'CPP: ' + e.__str__()])

        # TODO: IMAGES
        i = 16
        '''try:
            pdfImages = Globals.base_folder + 'cli-tools\\pdfimages.exe'

            img_folder = self.document_folder + 'images\\'  # + pdf_name[:-4] + '\\'
            if not os.path.exists(img_folder):
                os.makedirs(img_folder)
            # cmd = pdfImages + ' -list ' + '\"' + pdf_path + '\"'
            # output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].split(b'\n')
            # save images to disk
            cmd = pdfImages + ' -list \"' + self.pdf_path + '\" \"' + ' ' + '\"'
            # subprocess.Popen(cmd, stdout=subprocess.PIPE)
            os.chdir(img_folder)
            image_list = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].split(b'\r\n')
            # os.remove(img_folder)
            # image_count = output.count('\n')
            image_count = image_list.__len__()
            if image_count > 2:
                # target = open(pdf_path_image, 'w')
                # target.write(image_list)
                # target.close()
                csv_row.insert(i, [self.csv_header[i], (image_count - 2).__str__()])
            elif image_count == 0:
                csv_row.insert(i, [self.csv_header[i], 0])
            else:
                csv_row.insert(i, [self.csv_header[i], 0])
        except Exception as e:
            csv_row.insert(i, [self.csv_header[i], e.__str__() + ' image info failed!!'])
            exit_call = e.__str__() + ' image info failed!!'
            print(exit_call)'''
        # TODO: IMAGES per page
        i = 17
        percent_img_per_page = float
        try:
            if not image_count == 0 or page_count == 0:
                percent_img_per_page = (float(image_count) /
                                        float(page_count)) * 100
            else:
                percent_img_per_page = 0
            csv_row.insert(i, [self.csv_header[i], percent_img_per_page])
        except Exception as e:
            csv_row.insert(i, [self.csv_header[i], 'IMG: ' + e.__str__()])
        # TODO: OCR risk
        i = 18
        try:
            if words_per_page == 0 or percent_img_per_page > 3000:
                ocr_risk = 5
            elif words_per_page < 15 or percent_img_per_page > 2000:
                ocr_risk = 4
            elif words_per_page < 40 or percent_img_per_page > 1000:
                ocr_risk = 3
            elif words_per_page < 70 or percent_img_per_page > 425:
                ocr_risk = 2
            elif words_per_page < 80 or percent_img_per_page > 200:
                ocr_risk = 1
            else:
                ocr_risk = 0
            csv_row.insert(i, [self.csv_header[i], ocr_risk])
        except Exception as e:
            csv_row.insert(i, [self.csv_header[i], 'OCR: ' + e.__str__()])
        # author, creator, producer, subject, title,
        di = pf
        try:
            di = pf.documentInfo
        except Exception as e:
            exit_call = e.__str__() + ' :: DOCUMENT INFO LOAD failed!!'
            print(exit_call)

        # Document info
        if di:
            # Author
            try:
                i = 19
                if di.author:
                    csv_row.insert(
                        i, [self.csv_header[i],
                            di.author.encode('UTF-8')])
                else:
                    csv_row.insert(i, [self.csv_header[i], 'NULL'])
            except Exception as e:
                csv_row.insert(i,
                               [self.csv_header[i], 'AUTHOR: ' + e.__str__()])
                exit_call = e.__str__() + ' doc info failed!!'
                print(exit_call)
            # Creator
            try:
                i = 20
                if di.creator:
                    csv_row.insert(
                        i, [self.csv_header[i],
                            di.creator.encode('UTF-8')])
                else:
                    csv_row.insert(i, [self.csv_header[i], 'NULL'])
            except Exception as e:
                csv_row.insert(i,
                               [self.csv_header[i], 'CREATOR: ' + e.__str__()])
                print(exit_call)
                print('#5.1')
            # Producer
            try:
                i = 21
                if di.producer:
                    csv_row.insert(
                        i, [self.csv_header[i],
                            di.producer.encode('UTF-8')])
                else:
                    csv_row.insert(i, [self.csv_header[i], 'NULL'])
            except Exception as e:
                csv_row.insert(
                    i, [self.csv_header[i], 'PRODUCER: ' + e.__str__()])
                print(exit_call)
            # Subject
            try:
                i = 22
                if di.subject:
                    csv_row.insert(
                        i, [self.csv_header[i],
                            di.subject.encode('UTF-8')])
                else:
                    csv_row.insert(i, [self.csv_header[i], 'NULL'])
            except Exception as e:
                csv_row.insert(i,
                               [self.csv_header[i], 'SUBJECT: ' + e.__str__()])
                print(exit_call)
            # Title
            try:
                i = 23
                if di.title:
                    csv_row.insert(
                        i, [self.csv_header[i],
                            di.title.encode('UTF-8')])
                else:
                    csv_row.insert(i, [self.csv_header[i], 'NULL'])
            except Exception as e:
                csv_row.insert(i,
                               [self.csv_header[i], 'TITLE: ' + e.__str__()])
                print(exit_call)
        # Document clip
        i = 24
        try:
            csv_row.insert(i, [self.csv_header[i], write_clip])
        except Exception as e:
            csv_row.insert(i, [self.csv_header[i], e.__str__()])
        # Write results
        row = []
        for i in range(csv_row.__len__()):
            row.append(csv_row[i][1])
        report_path = self.report_folder + self.report_name
        # COPLETE WRITE
        with open(report_path, 'a', encoding='utf8', newline='') as csv_file:
            writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
            writer.dialect.lineterminator.replace('\n', '')
            writer.writerow(row)
        # csv_file.close()
        fp.close()
        os.remove(self.pdf_path)

        # Log close
        msg = (' >>>> PDF complete:[' + self.url + '] ' +
               self.line_count.__str__() + ' ' +
               (datetime.datetime.now().__str__()[:-7]))
        print(msg)
        utils.logline(self.log, msg)