forked from netfarm/archiver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
backend_pgsql.py
331 lines (284 loc) · 9.8 KB
/
backend_pgsql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#!/usr/bin/env python
# -*- Mode: Python; tab-width: 4 -*-
#
# Netfarm Mail Archiver - release 2
#
# Copyright (C) 2005-2007 Gianluigi Tiesi <sherpya@netfarm.it>
# Copyright (C) 2005-2007 NetFarm S.r.l. [http://www.netfarm.it]
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# ======================================================================
## @file backend_pgsql.py
## PostgreSQL Storage and Archive Backend
__doc__ = '''Netfarm Archiver - release 2.1.0 - PostgreSQL backend'''
__version__ = '2.1.0'
__all__ = [ 'Backend' ]
from archiver import *
from sys import exc_info
from time import asctime
from types import StringType
from base64 import encodestring
from psycopg2 import connect as db_connect
mail_template = """
INSERT INTO mail (
mail_id,
year,
pid,
message_id,
from_login,
from_domain,
subject,
mail_date,
mail_size,
attachment,
media
) VALUES (
get_next_mail_id(),
get_curr_year(),
get_new_pid(),
'%(message_id)s',
'%(from_login)s',
'%(from_domain)s',
'%(subject)s',
'%(mail_date)s',
%(mail_size)s,
%(attachment)s,
-1
);
"""
recipient_template = """
INSERT INTO recipient (
mail_id,
to_login,
to_domain
) VALUES (
get_curr_mail_id(),
'%(to_login)s',
'%(to_domain)s'
);
"""
authorized_template = """
INSERT INTO authorized (
mail_id,
mailbox
) VALUES (
get_curr_mail_id(),
'%s'
);
"""
storage_template = """
INSERT INTO mail_storage (
year,
pid,
mail
) VALUES (
'%(year)d',
'%(pid)d',
'%(mail)s'
);
"""
##
def sql_quote(text):
"""sql_quote
quotes special chars and removes NULL chars
@param text: is the text that should be quoted
@return: quoted string"""
text = text.replace('\x00', '')
text = text.replace("\\", "\\\\")
text = text.replace("'", "\\'")
return text
def quote_dict(info):
for key in info.keys():
if type(info[key]) == StringType:
info[key] = sql_quote(info[key])
def format_msg(msg):
"""Formats an error message from pgsql backend
removes tabs and replaces cr with commas, also trims the msg to 256 chars
@param msg: is the original object for error message
@return: formatted message"""
msg = str(msg)
if len(msg) > 256:
msg = msg[:256] + '...(message too long)'
msg = ', '.join(msg.strip().split('\n'))
msg = msg.replace('\t', '')
return msg
class BadConnectionString(Exception):
"""BadConnectionString The specified connection string is wrong"""
pass
class ConnectionError(Exception):
"""ConnectionError An error occurred when connecting to PGSQL"""
pass
class Backend(BackendBase):
"""PGSQL Backend uses PostgreSQL database
This backend can be used either as Storage either as Archive"""
def __init__(self, config, stage_type, ar_globals, prefix = None):
"""The constructor
Initialize a connection to pgsql"""
self.config = config
self.type = stage_type
self.LOG = ar_globals['LOG']
if prefix is None:
self._prefix = 'PGSQL Backend: '
self.process = getattr(self, 'process_' + self.type, None)
if self.process is None:
raise StorageTypeNotSupported, self.type
else:
self._prefix = prefix
try:
dsn = self.config.get(self.type, 'dsn')
except:
dsn = 'Missing connection string'
if dsn.count(':') != 3:
raise BadConnectionString, dsn
username, password, host, dbname = dsn.split(':')
self.dsn = 'host=%s user=%s password=%s dbname=%s' % (host,
username,
password,
dbname)
self.connection = None
self.cursor = None
try:
self.connect()
except: pass
if prefix is None:
self.LOG(E_ALWAYS, self._prefix + '(%s) at %s' % (self.type, host))
def close(self):
"""closes the cursor and the connection"""
try:
self.cursor.close()
del self.cursor
except: pass
try:
self.connection.close()
del self.connection
except: pass
def connect(self):
"""make a connection to pgsql
raises ConnectionError if fails"""
self.close()
error = None
try:
self.connection = db_connect(self.dsn)
except:
## We can work without the db connection and call it when needed
t, val, tb = exc_info()
del t, tb
error = format_msg(val)
if error is not None:
self.LOG(E_ERR, self._prefix + 'connection to database failed: ' + error)
raise ConnectionError, error
self.connection.set_isolation_level(0)
self.cursor = self.connection.cursor()
self.LOG(E_TRACE, self._prefix + 'I\'ve got a cursor from the driver')
def do_query(self, qs, fetch=False, autorecon=False):
"""execute a query
Query -> reconnection -> Query
@param qs: the query string
@param fetch: if True the query must return a result
@param autorecon: if a query fails a db reconnection is done
@return: Boolean Status, data, and message"""
try:
self.cursor.execute(qs)
self.connection.commit()
res = []
if fetch:
res = self.cursor.fetchone()
return True, res, 'Ok'
except:
try:
self.connection.rollback()
except:
self.LOG(E_ERR, self._prefix + 'rollback failed')
self.LOG(E_ERR, self._prefix + 'query fails')
if autorecon:
self.LOG(E_ERR, self._prefix + 'Trying to reopen DB Connection')
error = None
try:
self.connect()
except:
error = 'Error reopening DB connectin'
if error is not None:
return False, [], 'Internal Server Error - ' + error
return self.do_query(qs, fetch)
else:
t, val, tb = exc_info()
del tb
msg = format_msg(val)
self.LOG(E_ERR, self._prefix + 'Cannot execute query: ' + msg)
self.LOG(E_ERR, self._prefix + 'the query was: ' + qs)
return False, [], '%s: Internal Server Error' % t
def parse_recipients(self, recipients):
result = []
for recipient in recipients:
try:
dlog, ddom = recipient[1].split('@', 1)
except:
self.LOG(E_ERR, self._prefix + 'Error parsing to/cc: ' + recipient[1])
dlog = recipient[1]
ddom = recipient[1]
result.append({'to_login': sql_quote(dlog[:512]), 'to_domain': sql_quote(ddom[:512]) })
return result
def process_archive(self, data):
"""process data from archiver main process
Creates a query by using data passed by the main archiver process
@param data: is a dict containing all needed stuff
@return: the result of do_query"""
# Conversions
quote_dict(data)
nattach = len(data['m_attach'])
mail_size = data['m_size']
subject = data['m_sub'][:512].encode('utf-8', 'replace')
mail_date = asctime(data['m_date'])
mid = data['m_mid'][:512]
slog, sdom = data['m_from'].split('@', 1)
slog = sql_quote(slog.strip()[:512])
sdom = sql_quote(sdom.strip()[:512])
values = { 'message_id' : mid,
'from_login' : slog,
'from_domain': sdom,
'subject' : subject,
'mail_date' : mail_date,
'mail_size' : mail_size,
'attachment' : nattach }
recipients = []
for rec in data['m_rec']:
rlog, rdom = rec.split('@', 1)
rlog = sql_quote(rlog.strip()[:512])
rdom = sql_quote(rdom.strip()[:512])
recipients.append(dict(to_login=rlog, to_domain=rdom))
qs = mail_template % values
for recipient in recipients:
qs = qs + recipient_template % recipient
for mailbox in data['m_mboxes']:
qs = qs + authorized_template % mailbox[:512]
qs = qs + 'SELECT year, pid from mail_pid;'
res, data, msg = self.do_query(qs, True, True)
if not res or len(data) != 2:
return 0, 443, msg
return data[0], data[1], msg # year, pid, message
def process_storage(self, data):
"""process storaging of mail on pgsql
The query doesn't return rows but only result code
@param data: is a dict containg year, pid and mail from archiver
@return: result code"""
msg = { 'year': data['year'],
'pid' : data['pid'],
'mail': encodestring(data['mail'])
}
res, data, msg = self.do_query(storage_template % msg)
if not res:
return 0, 443, msg
return BACKEND_OK
def shutdown(self):
"""shutdown the PGSQL stage
closes the pgsql connection and the stage Thread"""
self.close()
self.LOG(E_ALWAYS, self._prefix + '(%s): closing connection' % self.type)