-
Notifications
You must be signed in to change notification settings - Fork 0
/
xls2pgsql.py
executable file
·161 lines (135 loc) · 4.1 KB
/
xls2pgsql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/python
###############################
# Script to export xls or xlsx to PostgreSQL
# Angel Joyce Torres Ramirez
# joys.tower@gmail.com
# require xlrd psycopg2
# free to use
# usage:
# xls2pgsql.py -x <xlsfile> -e <xls_encode> -t <table to export> -h <host> -p <port> -u <user> -w <password> -d <database>
#
# the new table created all columns created as varchar
###############################
import os, sys, getopt
import unicodedata
PG_CONN_STRING = ""
PG_CONN=None
XLS_FILE=None
XLS_ENCODE=None
TABLE=None
#Comand Usage
def printCmdUsage():
print 'xls2pgsql.py -x <xlsfile> -e <xls_encode> -t <table to export> -h <host> -p <port> -u <user> -w <password> -d <database>'
sys.exit(2)
#Conection definitions##########
def conect2PG():
global PG_CONN_STRING
global PG_CONN
try:
PG_CONN=psycopg2.connect(PG_CONN_STRING)
except:
print "Can't connect to the database"
sys.exit(2)
def closeConn():
global PG_CONN
if PG_CONN is not None:
PG_CONN.close()
################################
#Read XLS##########
def remove_accents(input_str):
nfkd_form = unicodedata.normalize('NFKD', input_str)
return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
def xls2pg():
from xlrd.sheet import ctype_text
global XLS_FILE
global XLS_ENCODE
global PG_CONN
bk = None
print "Reading XLS"
if( XLS_ENCODE == None):
bk = xlrd.open_workbook(XLS_FILE)
else:
bk = xlrd.open_workbook(XLS_FILE, encoding_override=XLS_ENCODE)
print "Creating "+TABLE+" table"
sheet = bk.sheet_by_index(0)
row = sheet.row(0)
create_table = "CREATE TABLE "+TABLE+"("
for idx, cell_obj in enumerate(row):
#cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
#print cell_type_str
col = remove_accents(cell_obj.value)
col = col.replace(" ","_").lower()
create_table = create_table + col + " varchar,"
create_table = create_table[0:-1] + ");"
num_cols = sheet.ncols
print "Inserting data"
try:
cur = PG_CONN.cursor()
cur.execute( create_table )
for row_idx in range(1, sheet.nrows):
insert = "INSERT INTO "+TABLE+" VALUES("
row = sheet.row(row_idx)
for idx, cell_obj in enumerate(row):
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
if cell_type_str == "text":
cell = unicode(cell_obj.value)
cell = cell.encode('UTF-8')
cell = cell.replace("'","''")
else:
cell = str(cell_obj.value)
insert = insert + "'"+ cell +"',"
insert = insert[0:-1] + ");"
cur.execute( insert )
if( (row_idx % 100) == 0 ):
print ("."),
print "Commiting data"
cur.close()
PG_CONN.commit()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
closeConn()
################################
#Main
def readOptions():
try:
opts, args = getopt.getopt(sys.argv[1:],"x:e::t:h:p::u:w::d:")
except getopt.GetoptError:
printCmdUsage("1")
global XLS_ENCODE
global XLS_FILE
global TABLE
conn_options = ('-h', '-d', '-p', '-u', '-w')
connString = ""
for opt, arg in opts:
if opt == '-x':
XLS_FILE = arg
elif opt == '-e':
XLS_ENCODE = arg
elif opt == '-t':
TABLE = arg
elif opt == '-h':
connString = connString + " host='" + arg +"'"
elif opt == '-p':
connString = connString + " port='" + arg +"'"
elif opt == '-u':
connString = connString + " user='" + arg +"'"
elif opt == '-w':
connString = connString + " password='" + arg +"'"
elif opt == '-d':
connString = connString + " dbname='" + arg +"'"
if(XLS_FILE == ""):
printCmdUsage()
if(TABLE == ""):
printCmdUsage()
if(connString == ""):
printCmdUsage()
global PG_CONN_STRING
PG_CONN_STRING = connString
if __name__ == "__main__":
readOptions()
import psycopg2
conect2PG()
import xlrd
xls2pg()
closeConn()