def sqldate(dateStr): parsed = parsedate(dateStr) if parsed is None: parsed = parsedate(dateStr + ' 00:00:00 -0000') if parsed is None: return None return '%04d-%02d-%02d' % (parsed[0], parsed[1], parsed[2])
def _parsedate(date): """ Parse date and return datetime object. @param date RFC 2833 form data @return datetime object """ return datetime.datetime(*parsedate(date)[0:6])
def is_valid(message): """Check if a message is valid""" date_sent = time.mktime(parsedate(message.date_sent)) current_time = time.mktime(time.localtime()) recent_time = current_time - 1 return ((recent_time <= date_sent) and message.from_ == MY_DIGITS)
def calculate_attachments_dir(mlist, msg, msgdata): # Calculate the directory that attachments for this message will go # under. To avoid inode limitations, the scheme will be: # archives/private/<listname>/attachments/YYYYMMDD/<msgid-hash>/<files> # Start by calculating the date-based and msgid-hash components. fmt = "%Y%m%d" datestr = msg.get("Date") if datestr: now = parsedate(datestr) else: now = time.gmtime(msgdata.get("received_time", time.time())) datedir = safe_strftime(fmt, now) if not datedir: datestr = msgdata.get("X-List-Received-Date") if datestr: datedir = safe_strftime(fmt, datestr) if not datedir: # What next? Unixfrom, I guess. parts = msg.get_unixfrom().split() try: month = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12, }.get(parts[3], 0) day = int(parts[4]) year = int(parts[6]) except (IndexError, ValueError): # Best we can do I think month = day = year = 0 datedir = "%04d%02d%02d" % (year, month, day) assert datedir if mm_cfg.SCRUBBER_ADD_PAYLOAD_HASH_FILENAME: return os.path.join("attachments", datedir) else: # As for the msgid hash, we'll base this part on the Message-ID: so that # all attachments for the same message end up in the same directory (we'll # uniquify the filenames in that directory as needed). We use the first 2 # and last 2 bytes of the SHA1 hash of the message id as the basis of the # directory name. Clashes here don't really matter too much, and that # still gives us a 32-bit space to work with. msgid = msg["message-id"] if msgid is None: msgid = msg["Message-ID"] = Utils.unique_message_id(mlist) # We assume that the message id actually /is/ unique! digest = sha_new(msgid).hexdigest() return os.path.join("attachments", datedir, digest[:4] + digest[-4:])
def parse_http_date(date_string): """ Converts a HTTP datetime string into a Python datatime object. Doesn't support every single format, but it's good enough. """ try: return datetime.datetime(*parsedate(date_string)[:6]) except Exception: return None
def parse_http_date(date_string): """ Converts a HTTP datetime string into a Python datatime object. Doesn't support every single format, but it's good enough. """ try: return datetime.datetime(*parsedate(date_string)[:6]) except: return None
def date(self): """return a mx.DateTime object for the email's date or None if no date is set or if it can't be parsed """ value = self.get('date') if value: datetuple = parsedate(value) if datetuple: return DateTime(*datetuple[:6]) return None
def calculate_attachments_dir(mlist, msg, msgdata): # Calculate the directory that attachments for this message will go # under. To avoid inode limitations, the scheme will be: # archives/private/<listname>/attachments/YYYYMMDD/<msgid-hash>/<files> # Start by calculating the date-based and msgid-hash components. fmt = '%Y%m%d' datestr = msg.get('Date') if datestr: now = parsedate(datestr) else: now = time.gmtime(msgdata.get('received_time', time.time())) datedir = safe_strftime(fmt, now) if not datedir: datestr = msgdata.get('X-List-Received-Date') if datestr: datedir = safe_strftime(fmt, datestr) if not datedir: # What next? Unixfrom, I guess. parts = msg.get_unixfrom().split() try: month = { 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12, }.get(parts[3], 0) day = int(parts[4]) year = int(parts[6]) except (IndexError, ValueError): # Best we can do I think month = day = year = 0 datedir = '%04d%02d%02d' % (year, month, day) assert datedir # As for the msgid hash, we'll base this part on the Message-ID: so that # all attachments for the same message end up in the same directory (we'll # uniquify the filenames in that directory as needed). We use the first 2 # and last 2 bytes of the SHA1 hash of the message id as the basis of the # directory name. Clashes here don't really matter too much, and that # still gives us a 32-bit space to work with. msgid = msg['message-id'] if msgid is None: msgid = msg['Message-ID'] = Utils.unique_message_id(mlist) # We assume that the message id actually /is/ unique! digest = sha_new(msgid).hexdigest() # hash disabled to handle file duplicate over mutiple email. #return os.path.join('attachments', datedir, digest[:4] + digest[-4:]) return os.path.join('attachments', datedir)
def __init__(self, data): UserDict.UserDict.__init__(self) self.msg = email.message_from_string(data) self.boundary = self.msg['Boundary'] self.payload = self.msg.get_payload().split('--%s--' % self.boundary) self['from'] = self.msg['From'] self['date'] = parsedate(self.msg['Date']) self['subject'] = self.msg['Subject'] self['flags'] = eval('0x'+self.msg['X-MRIM-Flags']) self['version'] = self.msg['Version'] self['message'] = utils.win2str(self.payload[0].strip()) self['rtf-message'] = self.payload[1].strip()
def __init__(self, data): UserDict.UserDict.__init__(self) self.msg = email.message_from_string(data) self.boundary = self.msg['Boundary'] self.payload = self.msg.get_payload().split('--%s--' % self.boundary) self['from'] = self.msg['From'] self['date'] = parsedate(self.msg['Date']) self['subject'] = self.msg['Subject'] self['flags'] = eval('0x' + self.msg['X-MRIM-Flags']) self['version'] = self.msg['Version'] self['message'] = utils.win2str(self.payload[0].strip()) self['rtf-message'] = self.payload[1].strip()
def _parse_http_date(instr): """ @param instr: HTTP date string @type instr: string @return: seconds since the epoch @rtype: int """ date_tuple = parsedate(instr) # http://sourceforge.net/tracker/index.php?func=detail&aid=1194222&group_id=5470&atid=105470 if date_tuple[0] < 100: if date_tuple[0] > 68: date_tuple = (date_tuple[0]+1900,)+date_tuple[1:] else: date_tuple = (date_tuple[0]+2000,)+date_tuple[1:] return calendar.timegm(date_tuple)
def extractTime(self): """When we create a new copy of a message, we need to specify a timestamp for the message, if we can't get the information from the IMAP server itself. If the message has a valid date header we use that. Otherwise, we use the current time.""" message_date = self["Date"] if message_date is not None: parsed_date = parsedate(message_date) if parsed_date is not None: try: return Time2Internaldate(time.mktime(parsed_date)) except ValueError: pass except OverflowError: pass return Time2Internaldate(time.time())
def parse_date(self,date): #data=parsedate(date) try: #trying to prevent the rfc822.parsedate bug with Tue,26 instead of Tue, 26 secs=mktime_tz(parsedate_tz(date)) except: secs=time() data=parsedate(ctime(secs)) if data[3]<10: ora="0"+repr(data[3]) else: ora=repr(data[3]) if data[4]<10: minuti="0"+repr(data[4]) else: minuti=repr(data[4]) return repr(data[2])+"/"+repr(data[1])+"/"+repr(data[0])+" "+ora+":"+minuti,secs
def parse_date(self, date): #data=parsedate(date) try: #trying to prevent the rfc822.parsedate bug with Tue,26 instead of Tue, 26 secs = mktime_tz(parsedate_tz(date)) except: secs = time() data = parsedate(ctime(secs)) if data[3] < 10: ora = "0" + repr(data[3]) else: ora = repr(data[3]) if data[4] < 10: minuti = "0" + repr(data[4]) else: minuti = repr(data[4]) return repr(data[2]) + "/" + repr(data[1]) + "/" + repr( data[0]) + " " + ora + ":" + minuti, secs
def calculate_attachments_dir(mlist, msg, msgdata): # Calculate the directory that attachments for this message will go # under. To avoid inode limitations, the scheme will be: # archives/private/<listname>/attachments/YYYYMMDD/<msgid-hash>/<files> # Start by calculating the date-based and msgid-hash components. fmt = '%Y%m%d' datestr = msg.get('Date') if datestr: now = parsedate(datestr) else: now = time.gmtime(msgdata.get('received_time', time.time())) datedir = safe_strftime(fmt, now) if not datedir: datestr = msgdata.get('X-List-Received-Date') if datestr: datedir = safe_strftime(fmt, datestr) if not datedir: # What next? Unixfrom, I guess. parts = msg.get_unixfrom().split() try: month = {'Jan':1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12, }.get(parts[3], 0) day = int(parts[4]) year = int(parts[6]) except (IndexError, ValueError): # Best we can do I think month = day = year = 0 datedir = '%04d%02d%02d' % (year, month, day) assert datedir # As for the msgid hash, we'll base this part on the Message-ID: so that # all attachments for the same message end up in the same directory (we'll # uniquify the filenames in that directory as needed). We use the first 2 # and last 2 bytes of the SHA1 hash of the message id as the basis of the # directory name. Clashes here don't really matter too much, and that # still gives us a 32-bit space to work with. msgid = msg['message-id'] if msgid is None: msgid = msg['Message-ID'] = Utils.unique_message_id(mlist) # We assume that the message id actually /is/ unique! digest = sha_new(msgid).hexdigest() return os.path.join('attachments', datedir, digest[:4] + digest[-4:])
def date(self, alternative_source=False, return_str=False): """return a datetime object for the email's date or None if no date is set or if it can't be parsed """ value = self.get('date') if value is None and alternative_source: unix_from = self.message.get_unixfrom() if unix_from is not None: try: value = unix_from.split(" ", 2)[2] except IndexError: pass if value is not None: datetuple = parsedate(value) if datetuple: if lgc.USE_MX_DATETIME: return DateTime(*datetuple[:6]) return datetime(*datetuple[:6]) elif not return_str: return None return value
def extractTime(self): # When we create a new copy of a message, we need to specify # a timestamp for the message. If the message has a valid date # header we use that. Otherwise, we use the current time. message_date = self["Date"] if message_date is not None: parsed_date = parsedate(message_date) if parsed_date is not None: try: return Time2Internaldate(time.mktime(parsed_date)) except ValueError: # Invalid dates can cause mktime() to raise a # ValueError, for example: # >>> time.mktime(parsedate("Mon, 06 May 0102 10:51:16 -0100")) # Traceback (most recent call last): # File "<interactive input>", line 1, in ? # ValueError: year out of range # (Why this person is getting mail from almost two # thousand years ago is another question <wink>). # In any case, we just pass and use the current date. pass except OverflowError: pass return Time2Internaldate(time.time())
def cache_sort(i): return datetime.fromtimestamp(time.mktime(parsedate(i[1][1]['Date'])))
def parse_date_format(date): try: return parsedate(date) except Exception: raise FormatException(ugettext('Could not parse the date "%s"') % date)
def getDateTime(self): return parsedate(self._msg.get('date', None))
def populate(incoming): """ Populate the error table with the incoming error """ # special lookup the account err = Error() uid = incoming.get("account", "") if not settings.ANONYMOUS_POSTING: if not uid: raise ValueError, "Missing the required account number." if str(uid) != settings.ARECIBO_PUBLIC_ACCOUNT_NUMBER: raise ValueError, "Account number does not match" # special if incoming.has_key("url"): for k, v in break_url(incoming["url"]).items(): setattr(err, k, v) # check the status codes if incoming.has_key("status"): status = str(incoming["status"]) try: valid_status(status) err.status = status except StatusDoesNotExist: err.errors += "Status does not exist, ignored.\n" # not utf-8 encoded for src, dest in [ ("ip", "ip"), ("user_agent", "user_agent"), ("uid", "uid"), ]: actual = incoming.get(src, None) if actual is not None: setattr(err, dest, str(actual)) try: priority = int(incoming.get("priority", 0)) except ValueError: priority = 0 err.priority = min(priority, 10) # possibly utf-8 encoding for src, dest in [ ("type", "type"), ("msg", "msg"), ("server", "server"), ("traceback", "traceback"), ("request", "request"), ("username", "username") ]: actual = incoming.get(src, None) if actual is not None: try: setattr(err, dest, actual.encode("utf-8")) except UnicodeDecodeError: err.errors += "Encoding error on the %s field, ignored.\n" % src # timestamp handling if incoming.has_key("timestamp"): tmstmp = incoming["timestamp"].strip() if tmstmp.endswith("GMT"): tmstmp = tmstmp[:-3] + "-0000" tme = parsedate(tmstmp) if tme: try: final = datetime(*tme[:7]) err.error_timestamp = final err.error_timestamp_date = final.date() except ValueError, msg: err.errors += 'Date error on the field "%s", ignored.\n' % msg
def extractTag(self, call): ims = parsedate(request.headers.get('If-Modified-Since', None)) return (request.headers.get('If-None-Match', None), datetime(ims[0], ims[1], ims[2], ims[3], ims[4], ims[5]) if ims is not None else None)
def getPubDate(enclosure): item = enclosure.parentNode pubDates = item.getElementsByTagName('pubDate') if not pubDates: return None return time.mktime(parsedate(getText(pubDates[0])))
def populate(err, incoming): """ Populate the error table with the incoming error """ # special lookup the account uid = incoming.get("account", "") if not uid: raise ValueError, "Missing the required account number." if str(uid) != settings.ARECIBO_PUBLIC_ACCOUNT_NUMBER: raise ValueError, "Account number does not match" # special if incoming.has_key("url"): for k, v in break_url(incoming["url"]).items(): setattr(err, k, v) # check the status codes if incoming.has_key("status"): status = str(incoming["status"]) try: valid_status(status) err.status = status except StatusDoesNotExist: err.errors += "Status does not exist, ignored.\n" # not utf-8 encoded for src, dest in [ ("ip", "ip"), ("user_agent", "user_agent"), ("uid", "uid"), ]: actual = incoming.get(src, None) if actual is not None: setattr(err, dest, str(actual)) try: priority = int(incoming.get("priority", 0)) except ValueError: priority = 0 err.priority = min(priority, 10) # possibly utf-8 encoding for src, dest in [("type", "type"), ("msg", "msg"), ("server", "server"), ("traceback", "traceback"), ("request", "request"), ("username", "username")]: actual = incoming.get(src, None) if actual is not None: try: setattr(err, dest, actual.encode("utf-8")) except UnicodeDecodeError: err.errors += "Encoding error on the %s field, ignored.\n" % src # timestamp handling if incoming.has_key("timestamp"): tmstmp = incoming["timestamp"].strip() if tmstmp.endswith("GMT"): tmstmp = tmstmp[:-3] + "-0000" tme = parsedate(tmstmp) if tme: try: final = datetime(*tme[:7]) err.error_timestamp = final except ValueError, msg: err.errors += 'Date error on the field "%s", ignored.\n' % msg
def get_details(req, parsed): rss = get_first_tag(parsed, 'rss') category_els = rss.getElementsByTagName('itunes:category') categories = [] for cat_el in category_els: cat = cat_el.getAttribute('text') if not cat: continue if cat_el.parentNode.nodeName.lower() == 'itunes:category': if not cat_el.parentNode.getAttribute('text'): continue cat = '%s/%s' % (cat_el.parentNode.getAttribute('text'), cat) categories.append(cat) items = [] data = { # Required RSS elements 'name': first_tag_text(rss, 'title'), 'homepage': first_tag_text(rss, 'link'), 'description': first_tag_text(rss, 'description'), # Optional RSS elements 'language': first_tag_text(rss, 'language', 'en-US'), 'copyright': first_tag_text(rss, 'copyright', ''), 'subtitle': first_tag_text(rss, 'itunes:subtitle', ''), 'author_name': first_tag_text(rss, 'itunes:author', req.user.username), 'is_explicit': first_tag_bool(rss, 'itunes:explicit'), 'cover_image': first_tag_attr(rss, 'itunes:image', 'href'), 'categories': categories, 'copyright': first_tag_text(rss, 'dc:copyright', ''), 'items': items, '__ignored_items': 0, } item_nodes = rss.getElementsByTagName('item') if not item_nodes: raise FormatException(ugettext('No <item> nodes in the feed were found')) for node in item_nodes: audio_url = first_tag_attr(node, 'enclosure', 'url') if not audio_url: data['__ignored_items'] += 1 continue duration = first_tag_text(node, 'itunes:duration', '0:00') dur_tup = map(int, duration.split(':')) if len(dur_tup) == 1: dur_seconds = dur_tup[0] elif len(dur_tup) == 2: dur_seconds = dur_tup[0] * 60 + dur_tup[1] else: dur_seconds = dur_tup[-3] * 3600 + dur_tup[-2] * 60 + dur_tup[-1] items.append({ 'title': first_tag_text(node, 'title'), 'description': first_tag_text(node, 'description'), 'subtitle': first_tag_text(node, 'itunes:subtitle', ''), 'publish': parsedate(first_tag_text(node, 'pubDate')), 'image_url': first_tag_attr(node, 'itunes:image', 'href', ''), 'duration': dur_seconds, 'audio_url': audio_url, 'audio_size': first_tag_attr(node, 'enclosure', 'length'), 'audio_type': first_tag_attr(node, 'enclosure', 'type'), 'copyright': first_tag_text(node, 'dc:copyright', ''), 'license': first_tag_text(node, 'dc:rights', ''), }) return data
def recibir_email(config): # Se establece conexion con el servidor pop3 print "Email", config.email m = poplib.POP3(config.servidor_pop3, str(config.puerto_pop3)) m.user(config.email) m.pass_(config.clave_email) #contar mails sin leer #obtener los mensajes para analizarlos attachments = [] msglist = [] poplist = m.list() print poplist if poplist[0].startswith('+OK'): msglist = poplist[1] numero = len(msglist) print(numero) for i in range(numero): print "Mensaje numero" + str(i + 1) print "--------------------" # Se lee el mensaje y se parsea el mje response, headerLines, bytes = m.retr(i + 1) mensaje = '\n'.join(headerLines) p = Parser() email = p.parsestr(mensaje) print(parseaddr(email['From'])[1]) #remitentes = config.lista_blanca.split(',') #print remitentes #for remitente in remitentes: try: #print (parseaddr(email['From'])[1].index(remitente)) #if parseaddr(email['From'])[1].index(remitente)>=0: #Se formatea la hora para guardarla en la BD a = parsedate(email['Date']) b = time.mktime(a) c = datetime.fromtimestamp(int(b)).strftime('%Y-%m-%d %H:%M') #''.join([ unicode(t[0], t[1] or default_charset) for t in dh ]) #Si es compuesto con HTML o texto plano ascii # default_charset = 'ascii' tit = decode_header(email['Subject']) default_charset = 'ASCII' tipo_mail = tit[0][1] t_ = unicode("") tit = unidecode( t_.join([unicode(t[0], t[1] or default_charset) for t in tit])) is_html = False for part in email.walk(): parte = '' print "content type:", part.get_content_type() if part.get_content_type( ) == "text/html" or part.get_content_type() == "text/plain": if part.get_content_type() == "text/html": is_html = True charset = part.get_content_charset(failobj="utf-8") print "charset.", charset print part.get_content_type() print "part pre sanitise", part part = sanitise(part) parte_utf = unicode(part.get_payload(decode=True), str(charset), "ignore")\ .encode('utf8', 'replace') print "part post sanitise", parte_utf if not is_html: charset = part.get_content_charset() print "charset.", charset print part.get_content_type() print "part pre sanitise", part part = sanitise(part) parte_utf = unicode(part.get_payload(decode=True), str(charset), "ignore")\ .encode('utf8', 'replace') print "part post sanitise", parte_utf else: # print 'Texto plano' # part = sanitise(part) # parte = part.get_payload(decode=True) print "attach type:", part.get_content_type() attach = get_attach(part) if attach: attachments.append(attach) # else: # print "ascii way" # for part in email.walk(): # parte='' # if part.get_content_type()=="text/html" or part.get_content_type() == "text/plain": # print 'html' # part = sanitise(part) # parte_ascii = unicode(part.get_payload(decode=True)) # else: # # print 'Texto plano' # # part = sanitise(part) # # parte = part.get_payload(decode=True) # print "attach type:", part.get_content_type() # attach = get_attach(part) # if attach: # attachments.append(attach) #Se guarda una instancia del Mail print "titulo", tit print "detalle", parte_utf print "connect db..." try: db.connect() except Exception, e: print "DB re Connect error", e print "db ok" cuenta = config.nombre noti = NotificadorExterno() noti.creado_por = 1 # noti.asignado_a = 8 noti.destinatario = cuenta noti.creado_el = datetime.now().strftime('%Y-%m-%d %H:%M') noti.fecha_hora_inicio = noti.creado_el noti.asunto = tit[:255] noti.actividad = 'Email' noti.estado = 1 noti.e_mail = parseaddr(email['From'])[1] try: noti.detalle = parte_utf except Exception, e: print "error detalle", e noti.detalle = "" try: '''if NotificadorExterno.select().where(): print 'YA EXISTE' else:''' print "pre save", noti.e_mail noti.save() print "save ok!" print noti.id print 'Se grabo!!!' auditable = [] print "adjuntos:", attachments for attach in attachments: print "save attach!" folder = get_folder(noti.id) pathfile = save_attach(folder, attach) attach_db = save_attach_db(noti.id, attach.name, pathfile) audit = create_attach_link(attach_db) auditable.append(audit) if auditable: noti.adjuntos = "\n".join(auditable) noti.save() print "save auditable" m.dele(i + 1) except Exception, e: print('NO SE GRABO!!!!') print repr(e)
def populate(err, incoming): # special lookup the account uid = incoming.get("account", "") if not uid: raise ValueError, "Missing the required account number." if str(uid) != settings.ARECIBO_PUBLIC_ACCOUNT_NUMBER: raise ValueError, "Account number does not match" # special if incoming.has_key("url"): err.raw = incoming["url"] parsed = list(urlparse(incoming["url"])) err.protocol, err.domain = parsed[0], parsed[1] err.query = urlunparse(["",""] + parsed[2:]) # check the status codes if incoming.has_key("status"): status = str(incoming["status"]) try: valid_status(status) err.status = status except StatusDoesNotExist: err.errors += "Status does not exist, ignored.\n" # not utf-8 encoded for src, dest in [ ("ip", "ip"), ("user_agent", "user_agent"), ("uid", "uid"), ]: actual = incoming.get(src, None) if actual is not None: setattr(err, dest, actual) try: priority = int(incoming["priority"] or 0) except KeyError: # key not found priority = 0 except ValueError: # int conversion error priority = 0 err.priority = min(priority, 10) # possibly utf-8 encoding for src, dest in [ ("type", "type"), ("msg", "msg"), ("server", "server"), ("traceback", "traceback"), ("request", "request"), ("username", "username") ]: actual = incoming.get(src, None) if actual is not None: try: setattr(err, dest, actual.encode("utf-8")) except UnicodeDecodeError: err.errors += "Encoding error on the %s field, ignored.\n" % src # timestamp handling if incoming.has_key("timestamp"): tmstmp = incoming["timestamp"].strip() if tmstmp.endswith("GMT"): tmstmp = tmstmp[:-3] + "-0000" tme = parsedate(tmstmp) if tme: try: final = datetime(*tme[:7]) err.error_timestamp = final except ValueError, msg: err.errors += 'Date error on the field "%s", ignored.\n' % msg