예제 #1
0
def create_xlsx(pdf_path ,api_key):
    try:
        c = pdftables_api.Client(api_key)
        c.xlsx(pdf_path, 'output')
    except FileNotFoundError:
        print("PDF Path is Invalid")
    except pdftables_api.pdftables_api.APIException:
        print("API Key was Invalid")
def convert_PDFtoExcel(a, b):
    try:
        conversion = api.Client('**********')
        conversion.xlsx_multiple(a, b)
    except:
        print("Error - While converting Source file -" + a +
              " from pdf to excel")
    return b
예제 #3
0
def convert_pdf_to_excel(pdf_path):
    c = pdftables_api.Client(API_KEY)

    path, file = os.path.split(pdf_path)
    excelfile = file.split(".")[-2]
    excelfile_path = "./temp"
    c.xlsx(pdf_path,excelfile_path+"/temporary")

    return excelfile_path
예제 #4
0
def convert_pdf_to_excel(pdf_path):
    c = pdftables_api.Client(API_KEY)

    path, file = os.path.split(pdf_path)
    excelfile = file.split(".")[-2]
    excelfile_path = "/home/srinidhi/angular/extractor/temp"
    c.xlsx(pdf_path, excelfile_path + "/temporary")
    print(excelfile_path, "File Created")
    return excelfile_path
예제 #5
0
def convertpdf_xml(request):
    df = "./static/pdf/pdf.pdf"
    filename = 'static/datascience/convertpdf/PDFdata_docx.xml'
    c = pdftables_api.Client('sx112tn9r25e')
    c.xml(df, filename)
    download_name = "PDFdata_docx.xml"
    wrapper = FileWrapper(open(filename))
    response = HttpResponse(wrapper, content_type="text/xml; charset=utf-8")
    response['Content-Disposition'] = "attachment; filename=%s" % download_name
    return response
예제 #6
0
def convertpdf_csv(request):
    df = "./static/pdf/pdf.pdf"
    filename = 'static/datascience/convertpdf/PDFdata_csv.csv'
    c = pdftables_api.Client('sx112tn9r25e')
    c.csv(df, filename)
    download_name = "PDFdata_csv.csv"
    wrapper = FileWrapper(open(filename))
    response = HttpResponse(wrapper, content_type='text/csv')
    response['Content-Disposition'] = "attachment; filename=%s" % download_name
    return response
def convert_pdfFiles_to_xlsx(src):
    pdfFiles = []
    path = os.path.join(src, "*.pdf")
    for file in glob.glob(path):
        pdfFiles.append(file)
    ExcelFiles = []
    print("PDF Files:", pdfFiles)
    for file in pdfFiles:
        c = pdftables_api.Client(my_pdftables_api_key)
        #c.xlsx(file, file[0:len(file)-4]+'.xlsx')
        ExcelFiles.append(file[0:len(file) - 4] + '.xlsx')
    return ExcelFiles
def convertToCSV(key, inputFolder, outputFolder):
    # Convert PDF to CSV with API
    try:
        import pdftables_api

        # Make a list of all PDF files in input folder
        paths = glob.glob(inputFolder + '/*.pdf')
        print("CSVs count:", len(paths))

        if len(paths) == 0:
            print('Empty or wrong input folder')
            return

        for path in paths:
            # Name is output folder plus name of file minus last 4 characters '.pdf'
            name = outputFolder + path[len(inputFolder):-4]
            c = pdftables_api.Client(key)
            c.csv(path, name)
            print(name)

    except Exception as e:
        print(e)
예제 #9
0
import pdftables_api
import os

dir = raw_input("Insert folder path: ")

for entry in os.listdir(dir):
	if entry.endswith(".pdf"):
	
		a = os.path.join(dir, entry)
		b = os.path.join(dir, entry)
		d = b.split(".pdf")[0]
		
		c = pdftables_api.Client('5d8bp4bt79zt')
		
		c.xlsx(a, d + ".xlsx")

		print '"'+ entry +'"' , "Converted Successfully"
		
		continue
	else:
		continue
		
print "Done!"
예제 #10
0
import pdftables_api
c = pdftables_api.Client('ctslevg3totv')

for i in range(4,20):
	if (i<10):
		string='0'+str(i) 
	else:
		string=str(i)
	c.xlsx('district'+string+'.pdf', 'output'+string+'.xlsx')

  
예제 #11
0
def pdfToExcel(d):
    c = pdftables_api.Client('wa9l2jbtqv2t')
    c.csv('"E:/4-1/Final Project/pdf/"+ d + ".pdf"',
          '"E:/4-1/Final Project/csv/"+ d + ".csv"')
예제 #12
0
#Not sure why the package can't be used in pycharm but it works in IDLE

import pdftables_api
c = pdftables_api.Client('my_personal_API_code')

import os
os.chdir('/Users/jzalmano/Documents/')  #FOLDER_PATH_GOES_HERE

#Change 'output' to be any name you want the excel spreadsheet to be named
#Change PDF_NAME_GOES_HERE to be whatever the name of the pdf is
c.xlsx('PDF_NAME_GOES_HERE.pdf', 'output')
예제 #13
0
import pdftables_api
c = pdftables_api.Client('e3j22hhxx0ic')

for i in range(1, 5):
    if (i < 10):
        string = '0' + str(i)
    else:
        string = str(i)
    c.xlsx('district' + string + '.pdf', 'output' + string + '.xlsx')
예제 #14
0
"""
import os
import sys
base_path = tmp_global_obj["basepath"]
cur_path = base_path + 'modules' + os.sep + 'PDF2XLSX' + os.sep + 'libs' + os.sep
sys.path.append(cur_path)

import requests
import random
import pdftables_api
"""
    Obtengo el modulo que fueron invocados
"""
module = GetParams("module")

if module == "pdftables":
    pdf_file = GetParams("pdf")
    path = GetParams("path")
    api_key = GetParams("apikey")

    if not path.endswith(".xlsx"):
        path += ".xlsx"

    try:
        c = pdftables_api.Client(api_key)
        c.xlsx(pdf_file, path)
    except Exception as e:
        PrintException()
        raise e
import pdftables_api
c = pdftables_api.Client('emrssrlm10zx')

for i in range(1, 46):
    if (i < 10):
        string = '0' + str(i)
    else:
        string = str(i)
    c.xlsx('district' + string + '.pdf', 'output' + string + '.xlsx')
import pdftables_api
c = pdftables_api.Client('ywjdw1ch1az9')

for i in range(22, 26):
    if (i < 10):
        string = '0' + str(i)
    else:
        string = str(i)
    c.xlsx('district' + string + '.pdf', 'output' + string + '.xlsx')
예제 #17
0
import os
import pdftables_api


client = pdftables_api.Client('14tn2liiiei7')

for num, file in enumerate(os.listdir()):
	if file.endswith('pdf'):
		client.csv(file, 'fromsas_{}'.format(num))
	else:
		raise("There are no pdf files.")
import pdftables_api
c = pdftables_api.Client('2f8d5j4y33tv')

for i in range(9, 18):
    if (i < 10):
        string = '0' + str(i)
    else:
        string = str(i)
    c.xlsx('district' + string + '.pdf', 'output' + string + '.xlsx')
예제 #19
0
import pandas as pd
import pdftables_api
import os
if not os.path.exists('test'):
    os.makedirs('test')
if not os.path.exists('excel'):
    os.makedirs('excel')
from parser_pdf import download_pdf
print("Файлы в формате pdf можете найти в папке test")
print("Файлы в формате xlsx можете найти в папке excel")
absdwdirname = os.path.abspath('test')
with open("settings.txt", 'r') as file:
    f = file.read().split("\n")
    key = f[0]
if key == '': key = '00xa7kk2eja9'
print("ключ", key)
c = pdftables_api.Client(key)
dfs = pd.read_excel('inn.xlsx')
for i in dfs['ИНН'].tolist():
    if len(str(i)) == 12 or len(str(i)) == 10:
        download_pdf(i)

print("Завершено скачивание pdf файлов...Начинаем преобразование в xlsx")

for filename in os.listdir(absdwdirname):
    c.xlsx(os.path.join(absdwdirname, filename),
           os.path.join('excel', filename[:len(filename) - 4]))

print("Процесс закончен!")
import pdftables_api
c = pdftables_api.Client('bfgu7mbg51q8')

for i in range(17,28):
	if (i<10):
		string='0'+str(i) 
	else:
		string=str(i)
	c.xlsx('district'+string+'.pdf', 'output'+string+'.xlsx')

  
예제 #21
0
#Code is for conversion of pdf file to excel file
import pdftables_api                        #importing module given by pdftables
import os
#Enter the path of the folder where all pdf file are stored
a=raw_input("Enter the path of the folder")
try:
    c = pdftables_api.Client('zmll391gneyq')
    file_path = a+"\\"
    for file in os.listdir(file_path):
#If statement to find all the file in the folder with .pdf format
        if file.endswith(".pdf"):
            c.xlsx(os.path.join(file_path,file), file+'.xlsx')
#Except handling just to prevent any error in try statement regarding intalling gitbash or apikey
except Exception:
    try:
        c = pdftables_api.Client('mu5pqlmbcxzh')
        file_path = a+"\\"
        for file in os.listdir(file_path):
            if file.endswith(".pdf"):
                c.xlsx(os.path.join(file_path, file), file + '.xlsx')
    except Exception:
        try:
            c = pdftables_api.Client('rk8wn6n8l98b')
            file_path = a+"\\"
            for file in os.listdir(file_path):
                if file.endswith(".pdf"):
                    c.xlsx(os.path.join(file_path, file), file + '.xlsx')
        except Exception:
            try:
                c = pdftables_api.Client('vchmq35pbbjy')
                file_path = a+"\\"
예제 #22
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
import sys
sys.path.insert(0, 'libs')
import pdftables_api
import xlrd
import xlwt

c = pdftables_api.Client('fc9wow7u2a9c')


def crear_xlsx(banco):
    c.xlsx(banco + '.pdf', 'xlsx/' + banco + '_output.xlsx')


def generar_banaci(banco):
    try:
        a = formatear_banaci(banco)
        return a
    except:
        None


def formatear_banaci(banco):
    try:
        book = xlrd.open_workbook('xlsx/' + banco + '_output.xlsx')
        sheet = book.sheet_by_index(2)
        var = {}
        for col in range(1, 10):
            ind = limpiar_banaci(sheet.cell_value(1, col)).lower()
import pdftables_api
c = pdftables_api.Client('eqnu7qshno1h')

for i in range(3, 38):
    if (i < 10):
        string = '0' + str(i)
    else:
        string = str(i)
    c.xlsx('district' + string + '.pdf', 'output' + string + '.xlsx')
예제 #24
0
import pdftables_api
c = pdftables_api.Client('rm20ic2hu1j1')

for i in range(1, 9):
    if (i < 10):
        string = '0' + str(i)
    else:
        string = str(i)
    c.xlsx('district' + string + '.pdf', 'output' + string + '.xlsx')
예제 #25
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 20 04:53:02 2020

@author: snowden
"""

import pdftables_api

c.csv('/home/snowden/Downloads/1034-DEPENSES-2016.pdf',
      'pgt-depenses-2016.csv')
#replace c.xlsx with c.csv to convert to CSV
#replace c.xlsx with c.xml to convert to XML
#replace c.xlsx with c.html to convert to HTML
i = 0
listOfPDF = ['', '']

for file in listOfPDF:
    i += 1
    c = pdftables_api.Client('83ile2qas3u3')
    c.csv((file, 'file-{}').format(i))
예제 #26
0
import urllib.request
import pdftables_api
#fetching the pdf from the nsu site  
pdf_path = "http://www.northsouth.edu/newassets/images/5-240.AcademicCalendarSpring%202021.pdf"
def download_file(download_url, filename):
    response = urllib.request.urlopen(download_url)    
    file = open(filename + ".pdf", 'wb')
    file.write(response.read())
    file.close()
 

#Coverter Api that Converts the given Pdf into Xlxs 

download_file(pdf_path, "Test")
c = pdftables_api.Client('9v1fb38u4aah')
c.xlsx('Test.pdf', 'output')              


#replace c.xlsx with c.csv to convert to CSV



# Configuring google standard csv events. 





#Google Calender Api imprt converted csv file. 
예제 #27
0
import pdftables_api, requests
import os

# https://pdftables.com/pdf-to-excel-api
API_KEY = os.getenv('API_KEY')

convertor = pdftables_api.Client(API_KEY)


def remaining_conversions():
    req = requests.get(f'https://pdftables.com/api/remaining?key={API_KEY}')
    return f'{int(req.text)} pages available to convert'


def make_same_fileName(file_name):
    splited_path = file_name.split('/')
    primar_name = splited_path[-1]
    splited_primar = primar_name.split('.')
    if 'pdf' not in splited_primar:
        raise ValueError("file extension must be pdf")
    return splited_primar[0] + '.xlsx'


def convert_pdf_to_xlsl(pdf_path, xlsx_path):
    name = make_same_fileName(pdf_path)
    final_path = xlsx_path + name
    convertor.xlsx(pdf_path, final_path)
    print(f'Done. For this api_key remains {remaining_conversions()}')
    return name

import pdftables_api
import os

c = pdftables_api.Client('API_KEYS')

file_path = "D:/RnE/xmltable/"

for file in os.listdir(file_path):
    if file.endswith(".pdf"):
        c.xml(os.path.join(file_path,file), file+'.xml')
예제 #29
0
import sys
# print(sys.argv[1])
import os
import pdftables_api  #IMPORTANT LIBRARY
import time
print("Started Computer Vision To Analyaze PDF Files")
time.sleep(2)
dirs = os.listdir()
for file in dirs:
    if (file.endswith('.pdf')):
        print("Analyzing : ", file)
        c = pdftables_api.Client('hksh1ytt3b6o')  # MY API SECRET KEY
        print("Generating Excel For : ", file)
        time.sleep(2)
        c.xlsx(file, file + '.xlsx')  # FOR EXCEL
        print("Generating XML For DataScience Enthusiasts : ", file)
        time.sleep(2)
        c.xml(file, file + '.xml')  # FOR XML
        # c.csv('main.pdf','output.csv') //IF YOU WANT CSV FILE UNCOMMENT THIS
        # c.html('main.pdf','output.html') //IF YOU WANT HTML GILE UNCOMMENT THIS

os.system('mkdir files')
os.system("mv *.xlsx ./files/ && mv *.xml ./files/")
print("Making Final Zip File ")
time.sleep(3)
os.system("cd ./files && powershell Compress-Archive * final.zip")
print("Zip FIle Created With Name : final.zip")
print("[1]- To Open Zip File \n ")
print("[2]- To Email Zip File \n ")
option = int(
    input("Please Choose An Option Given Above [Enter Integer Only] : \n"))
예제 #30
0
import pdftables_api

c = pdftables_api.Client('62cjalkbkgj6')
c.csv('pdf3-pages-5-6.pdf', 'output')
#replace c.xlsx with c.csv to convert to CSV
#replace c.xlsx with c.xml to convert to XML
#replace c.xlsx with c.html to convert to HTML