def readTablePdf(fileName="demoData.pdf"):
    try:
        i = 7
        while not i == 0:
            print(
                "\n 1: Print DataFrame\n 2: Print JSON\n 3: Print Single Table\n 4: Print Multiple Tables "
            )
            print(
                " 5: Save to JSON File\n 6: Save to CSV File\n 7: Tabula Info\n 0: EXIT"
            )
            i = int(input("Choose one of the options from above: "))
            if i == 1:
                df = tabula.read_pdf(
                    fileName
                )  # Simply reads pdf and returns in DataFrame format
                print("DATA: \n", df)
            elif i == 2:
                df = tabula.read_pdf(fileName, output_format="json"
                                     )  # Reads and returns data in Json format
                print("JSON DATA: \n", df)
            elif i == 3:
                df = tabula.read_pdf(
                    fileName
                )  # Returns first occurence of table - we can even specify page numbers to retreive table from
                print("SINGLE TABLE DATA: \n", df)
            elif i == 4:
                df = tabula.read_pdf(fileName,
                                     multiple_tables=True,
                                     pages="all")  # Returns all tables data
                print("ALL TABLES DATA: \n", df)
            elif i == 5:
                ####### TO SAVE JSON INTO JSON FILE
                output = input("ENTER OUTPUT FILE NAME")
                # Reads and saves the complete file data in JSON format into an external JSON file of given name
                convert_into(fileName,
                             output,
                             output_format="json",
                             multiple_tables=True,
                             pages='all')
            elif i == 6:
                ####### TO SAVE JSON INTO CSV FILE
                output = input("ENTER OUTPUT FILE NAME")
                # Reads and saves the complete file data in CSV format into an external CSV file of given name
                convert_into(fileName,
                             output,
                             output_format="csv",
                             multiple_tables=True,
                             pages='all')
            elif i == 7:
                #Tabula Information
                print("TABULA INFORMATION:\n", tabula.environment_info())
            else:
                break

    except Exception as ex:
        print("Exception in opening file: ", ex)
Esempio n. 2
0
 def test_environment_info(self):
     self.assertEqual(tabula.environment_info(), None)
Esempio n. 3
0
 def test_environment_info(self):
     self.assertEqual(tabula.environment_info(), None)
Esempio n. 4
0
import tabula
#check your environment via tabula-py,which shows Python, Java #version, Java version, and your OS environment.
tabula.environment_info()
#

pdf_path = "G:\Shared drives\Agrigate\Data Analytics and Data Science\Research\Industry specs and knowledge\Fresh-Food-Trade-SA-2020.pdf"
# read pdf as CSV
tabula.convert_into(
    pdf_path,
    "G:\Shared drives\Agrigate\Data Analytics and Data Science\Research\Industry specs and knowledge\Fresh-Food-Trade-SA-2020.csv",
    pages="all",
    output_format="csv",
    stream=True)
Esempio n. 5
0
# # runtime: python37
# handlers:
# - url: /.*
#   secure: always
#   script: auto

from flask import Flask, render_template, request
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
from pandas import DataFrame

import tabula

test = tabula.environment_info()
print("TEST")
print(test)

import pygsheets

client = pygsheets.authorize(service_file="credentials.json")
sheet = client.open("COVID19_japan")
wks = sheet[0]

import pdfplumber
from io import BytesIO

import datetime
import json