def __call__(self, environ, start_response): """Respond to a request when called in the usual WSGI way.""" if environ['REQUEST_METHOD'] not in ('GET', 'HEAD'): headers = [('Allow', 'GET, HEAD')] return self.method_not_allowed(environ, start_response, headers) path_info = environ.get('PATH_INFO', '') full_path = self._full_path(path_info) if path_info.startswith('/exec'): from subprocess import Popen, PIPE, STDOUT import urllib query = environ.get('QUERY_STRING') args = [] cwd = '.' for var in query.split('&'): split = var.split('=') if split[0] == 'args': args = urllib.unquote_plus(split[1]).split(' ') if split[0] == 'cwd': cwd = split[1] print cwd print args proc = Popen(args, stdout=PIPE, stderr=STDOUT, cwd=cwd) proc.wait() headers = [('Date', rfc822.formatdate(time.time())), ('Content-Type', 'text/plain')] start_response("200 OK", headers) return proc.stdout.readlines() if not self._is_under_root(full_path): return self.not_found(environ, start_response) if path.isdir(full_path): if full_path[-1] <> '/' or full_path == self.root: location = util.request_uri(environ, include_query=False) + '/' if environ.get('QUERY_STRING'): location += '?' + environ.get('QUERY_STRING') headers = [('Location', location)] return self.moved_permanently(environ, start_response, headers) else: full_path = self._full_path(path_info + self.index_file) content_type = self._guess_type(full_path) try: etag, last_modified = self._conditions(full_path, environ) headers = [('Date', rfc822.formatdate(time.time())), ('Last-Modified', last_modified), ('ETag', etag)] if_modified = environ.get('HTTP_IF_MODIFIED_SINCE') if if_modified and (rfc822.parsedate(if_modified) >= rfc822.parsedate(last_modified)): return self.not_modified(environ, start_response, headers) if_none = environ.get('HTTP_IF_NONE_MATCH') if if_none and (if_none == '*' or etag in if_none): return self.not_modified(environ, start_response, headers) file_like = self._file_like(full_path) headers.append(('Content-Type', content_type)) start_response("200 OK", headers) if environ['REQUEST_METHOD'] == 'GET': return self._body(full_path, environ, file_like) else: return [''] except (IOError, OSError), e: print e return self.not_found(environ, start_response)
def process_users(self, items): users = self.session.users for (i, item) in enumerate(items): # Tweets if 'user' in item: if unicode(item['user']['id']) not in users: with self.session.storage_lock: users[unicode(item['user']['id'])] = {} if '_last_update' not in users[unicode(item['user']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['user']['id'])]['_last_update'])): item['user']['_last_update'] = item['created_at'] with self.session.storage_lock: users[unicode(item['user']['id'])].update(item['user']) item['user'] = users[unicode(unicode(item['user']['id']))] # Retweets if 'retweeted_status' in item and 'user' in item['retweeted_status']: if unicode(item['retweeted_status']['user']['id']) not in users: with self.session.storage_lock: users[unicode(item['retweeted_status']['user']['id'])] = {} if '_last_update' not in users[unicode(item['retweeted_status']['user']['id'])] or time.mktime(rfc822.parsedate(item['retweeted_status']['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['retweeted_status']['user']['id'])]['_last_update'])): item['retweeted_status']['user']['_last_update'] = item['retweeted_status']['created_at'] with self.session.storage_lock: users[unicode(item['retweeted_status']['user']['id'])].update(item['retweeted_status']['user']) item['retweeted_status']['user'] = users[unicode(item['retweeted_status']['user']['id'])] # Direct messages if 'sender' in item: if unicode(item['sender']['id']) not in users: with self.session.storage_lock: users[unicode(item['sender']['id'])] = {} if '_last_update' not in users[unicode(item['sender']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['sender']['id'])]['_last_update'])): item['sender']['_last_update'] = item['created_at'] with self.session.storage_lock: users[unicode(item['sender']['id'])].update(item['sender']) item['sender'] = users[unicode(item['sender']['id'])] if 'recipient' in item: if unicode(item['recipient']['id']) not in users: with self.session.storage_lock: users[unicode(item['recipient']['id'])] = {} if '_last_update' not in users[unicode(item['recipient']['id'])] or time.mktime(rfc822.parsedate(item['created_at'])) > time.mktime(rfc822.parsedate(users[unicode(item['recipient']['id'])]['_last_update'])): item['recipient']['_last_update'] = item['created_at'] with self.session.storage_lock: users[unicode(item['recipient']['id'])].update(item['recipient']) item['recipient'] = users[unicode(item['recipient']['id'])] return items
def __call__(self, environ, start_response): """Respond to a request when called in the usual WSGI way.""" path_info = environ.get('PATH_INFO', '') full_path = self._full_path(path_info) if not self._is_under_root(full_path): return self.not_found(environ, start_response) if path.isdir(full_path): if full_path[-1] <> '/' or full_path == self.root: location = util.request_uri(environ, include_query=False) + '/' if environ.get('QUERY_STRING'): location += '?' + environ.get('QUERY_STRING') headers = [('Location', location)] return self.moved_permanently(environ, start_response, headers) else: full_path = self._full_path(path_info + self.index_file) try: sz = int(environ['CONTENT_LENGTH']) except: sz = 0 if environ['REQUEST_METHOD'] == 'PUT' and sz > 0: for putglob in self.puttable: if fnmatch(path_info, putglob): data = environ['wsgi.input'].read(sz) try: with open(full_path, "wb") as f: f.write(data) return self.success_no_content(environ, start_response) except: print sys.exc_info()[1] return self.server_error(environ, start_response) if environ['REQUEST_METHOD'] not in ('GET', 'HEAD'): headers = [('Allow', 'GET, HEAD')] return self.method_not_allowed(environ, start_response, headers) content_type = self._guess_type(full_path) try: etag, last_modified = self._conditions(full_path, environ) headers = [('Date', rfc822.formatdate(time.time())), ('Last-Modified', last_modified), ('ETag', etag)] if_modified = environ.get('HTTP_IF_MODIFIED_SINCE') if if_modified and (rfc822.parsedate(if_modified) >= rfc822.parsedate(last_modified)): return self.not_modified(environ, start_response, headers) if_none = environ.get('HTTP_IF_NONE_MATCH') if if_none and (if_none == '*' or etag in if_none): return self.not_modified(environ, start_response, headers) file_like = self._file_like(full_path) headers.append(('Content-Type', content_type)) start_response("200 OK", headers) if environ['REQUEST_METHOD'] == 'GET': return self._body(full_path, environ, file_like) else: return [''] except (IOError, OSError), e: print e return self.not_found(environ, start_response)
def guardar(RSS, resource, num, usuario): if resource == "rss": rss = RSS else: rss = resource try: d = feedparser.parse(rss) title = d.feed.title url = d.feed.link Rss = True except: Rss = False if Rss: try: logo = '<img src="'+d.feed.image.href+'">' except: logo = "" date = d.feed.published date = datetime.datetime.fromtimestamp(calendar.timegm(rfc822.parsedate(date))) # Guardar los nuevos canales de /canales try: T_Canal = Channels.objects.get(Title = title) T_Canal.Logo = logo T_Canal.RSS = rss T_Canal.Date = date T_Canal.Url = url T_Canal.NumMensaje = T_Canal.NumMensaje except Channels.DoesNotExist: T_Canal = Channels(Title = title, Logo = logo, RSS = rss, Date = date, Url = url, NumMensaje = 0) T_Canal.save() numNoticias = 0 # Guardar las nuevas noticias de /canales/num for N_noticia in d.entries: titulo = N_noticia.title link = N_noticia.link date2 = N_noticia.published date2 = datetime.datetime.fromtimestamp(calendar.timegm(rfc822.parsedate(date2))) descripcion = N_noticia.description titular = d.feed.title try: T_num = CanalesNum.objects.get(Titulo = titulo, CanalId = T_Canal.id) T_num.Link = link T_num.Date = date2 T_num.Descripcion = descripcion T_num.Titular = titular except CanalesNum.DoesNotExist: T_num = CanalesNum(Titulo = titulo, CanalId = T_Canal.id, Link = link, Date = date2, Descripcion = descripcion, Titular = titular) numNoticias = numNoticias + 1 T_num.save() T_Canal.NumMensaje = T_Canal.NumMensaje + numNoticias T_Canal.save() # Si le das al boton de actualizar de /canales/num if resource != "rss": return HttpResponseRedirect("http://localhost:1234/canales/"+num)
def index(self, req): """ Handle GET and HEAD requests for static files. Directory requests are not allowed""" static_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), '../static/')) # filter out .. try: static_path = req.urlvars['path'].replace('/..', '') except: return HTTPForbidden() path = os.path.join(static_dir, static_path) if os.path.isdir(path): return HTTPForbidden() if req.method == 'GET' or req.method == 'HEAD': if os.path.isfile(path): etag, modified, mime_type, size = self._get_stat(path) res = Response() res.headers['content-type'] = mime_type res.date = rfc822.formatdate(time.time()) res.last_modified = modified res.etag = etag if_modified_since = req.headers.get('HTTP_IF_MODIFIED_SINCE') if if_modified_since: if rfc822.parsedate(if_modified_since) >= rfc822.parsedate(last_modified): return HTTPNotModified() if_none_match = req.headers.get('HTTP_IF_NONE_MATCH') if if_none_match: if if_none_match == '*' or etag in if_none_match: return HTTPNotModified() # set the response body if req.method == 'GET': fd = open(path, 'rb') if 'wsgi.file_wrapper' in req.environ: res.app_iter = req.environ['wsgi.file_wrapper'](fd) res.content_length = size else: res.app_iter = iter(lambda: fd.read(8192), '') res.content_length = size else: res.body = '' return res else: return None else: return None
def guardar(RSS, resource, num, usuario): if resource == "rss": rss = RSS else: rss = resource try: d = feedparser.parse(rss) Rss = True except UnboundLocalError: Rss = False if Rss: title = d.feed.title url = d.feed.link Num_Mensaje = len(d.entries) try: logo = '<img src="'+d.feed.image.href+'">' except: logo = "" date = d.feed.published date = datetime.datetime.fromtimestamp(calendar.timegm(rfc822.parsedate(date))) try: T_Canal = Channels.objects.get(Title = title) T_Canal.Logo = logo T_Canal.RSS = rss T_Canal.Date = date T_Canal.Url = url T_Canal.NumMensaje = Num_Mensaje except Channels.DoesNotExist: T_Canal = Channels(Title = title, Logo = logo, RSS = rss, Date = date, Url = url, NumMensaje = Num_Mensaje) T_Canal.save() for N_noticia in d.entries: titulo = N_noticia.title link = N_noticia.link date = N_d.published date = datetime.datetime.fromtimestamp(calendar.timegm(rfc822.parsedate(date))) descripcion = N_d.description titular = d.feed.title try: T_num = CanalNum.objects.get(Titulo = titulo, Titular = titular) T_num.Link = link T_num.Date = date T_num.Descripcion = descripcion except CanalNum.DoesNotExist: T_num = CanalNum(Titulo = titulo, Titular = titular, Link = link, Date = date, Descripcion = descripcion) T_num.save() try: T_Num = CanalNum.objects.get() if resource != "rss": return HttpResponseRedirect("http://localhost:1234/canales/"+num)
def update_rssfeed(): pf = open(planetfile) data = pickle.load(pf) pf.close() entries = list(reversed(sorted([(x.pubDatedate, x) for x in data.entries])))[:15] resources = [] for key, entry in entries: resources.append(entry.link) rdf_li_resources = '\n'.join(['<rdf:li resource="%s" />' % x for x in resources]) template_item = templates.rdf_item template_subject = '<dc:subject>%s</dc:subject>' template_date = '<dc:date>%s</dc:date>' template_items=[] for key, entry in entries: subjects = '' if entry.categories: subjects = '\n'.join([template_subject % x for x in entry.categories]) entry.subjects = subjects date = rfc822.parsedate(entry.publishedDate) entry.dcdate = template_date % time.strftime("%Y-%m-%dT%H:%M:%S-07:00", date) template_items.append(template_item % entry.__dict__) items_rdf_about = '\n'.join(template_items) template = templates.rdf % (rdf_li_resources, items_rdf_about) rff = open(rssfeedfile, 'w') rff.write(template.encode('utf-8', 'ignore')) rff.close() print 'rss done'
def connect(self,): urlparsed = requests.utils.urlparse(self.url) self.wc = WebDAVClient(host=urlparsed.netloc, protocol=urlparsed.scheme) self.wc.setbasicauth(self.login.encode('utf-8'), self.passwd.encode('utf-8')) time_delta = None local_time = datetime.datetime.utcnow() response = self.wc.options('/').headers.get('date') if response is None: response = self.wc.options('/').headers.get('Date') remote_datetime = \ rfc822.parsedate(response) self.timedelta = time.mktime(local_time.utctimetuple()) \ - time.mktime(remote_datetime) self._check_notes_folder() return time_delta
def saveData(feed): santos = [] for item in range(0, len(feed)): title = strip_accents(feed[item]['text'].encode('utf-8')) title = title.replace("\n", " ") # Regular Expression to get what I want reg_notice = re.compile('^(.*)(: http://.*)$').search(title) reg_jogo = re.compile('^(.*)( Siga AO VIVO por aqui!)$').search(title) if reg_jogo is not None: title = reg_jogo.groups()[0][:90] elif reg_notice is not None: title = reg_notice.groups()[0][:90] else: title = title[:90] reg_via = re.compile('^(.*) - Via .*$').search(title) if reg_via is not None: title = reg_via.groups()[0][:90] created_at = rfc822.parsedate(feed[item]['created_at']) if (time.mktime(created_at) >= (time.time() - (60*60*12))): santos.append({ 'created_at': created_at, 'title': title }) return santos
def parse_headers(self, header, meta): meta.title = header.get("Subject") if header.get("Message-Id"): meta.foreign_id = unicode(header.get("Message-Id")) if header.get("From"): addr = address.parse(header.get("From")) if addr is not None: meta.author = addr.to_unicode() meta.add_email(addr.address) for hdr in ["To", "CC", "BCC"]: if header.get(hdr): for addr in address.parse_list(header.get(hdr)): meta.add_email(addr.address) date = header.get("Date") date = rfc822.parsedate(date) if date is not None: dt = datetime.fromtimestamp(mktime(date)) meta.add_date(dt) meta.headers = dict([(k, unicode(v)) for k, v in header.items()]) return meta
def getRemainingRateLimit(self): ## rate_limit_status = self.api.GetRateLimitStatus() (ret_code, rate_limit_status) = self.makeApiCall(self.api.GetRateLimitStatus, self.requestType) ## if there is an error if ret_code != 0: return [None, None] friend_id_limit = rate_limit_status.get( '/' + self.requestType + '/ids', None) reset_time = friend_id_limit.get('reset', None) limit = friend_id_limit.get('remaining', None) if reset_time: # put the reset time into a datetime object reset = datetime.datetime(*rfc822.parsedate(reset_time)[:7]) # find the difference in time between now and the reset time + 1 hour delta = reset + datetime.timedelta( minutes=10) - datetime.datetime.utcnow() return [int(delta.seconds), int(limit)] else: return [5, 1]
def on_data(self, data): """Called when raw data is received from the connection""" try: payload = json.loads(data) if ('in_reply_to_status_id' in payload and 'retweeted_status' not in payload): if self.firehose is not None: # Disconnect the current firehose connection and kill # reference to the firehose worker thread self.firehose.disconnect_firehose(self.predicate_type) self.firehose = None # Twitter uses RFC822 dates, parse them as such droplet_date_pub = time.strftime('%a, %d %b %Y %H:%M:%S +0000', rfc822.parsedate( payload['created_at'])) # Filter out non-standard Twitter RTs droplet_content = payload['text'].strip() retweet_match = re.findall('^(RT\:?)\s*', droplet_content, re.I) if len(retweet_match) == 0: screen_name, status_id = payload['user']['screen_name'], \ payload['id_str'] # Permalink for the tweet tweet_url = "https://twitter.com/%s/statuses/%s" % \ (screen_name, status_id) links = [{'url': tweet_url, 'original_url': True}] drop = { 'channel': 'twitter', 'identity_orig_id': payload['user']['id_str'], 'in_reply_to_user_id': payload['in_reply_to_user_id_str'], 'identity_name': payload['user']['name'], 'identity_username': screen_name, 'identity_avatar': payload['user']['profile_image_url'], 'droplet_orig_id': status_id, 'droplet_type': 'original', 'droplet_title': droplet_content, 'droplet_content': droplet_content, 'droplet_raw': droplet_content, 'droplet_locale': payload['user']['lang'], 'droplet_date_pub': droplet_date_pub, 'links': links} self.drop_queue.put((time.time(), drop), False) elif 'delete' in payload: status = payload['delete']['status'] self.on_delete(status['id_str'], status['user_id_str']) elif 'limit' in payload: track = payload['limit']['track'] self.on_limit(track) except Exception: # The data delivered by the streamin API could not be # serialized into a JSON object, ignore error pass
def guessSentTime(self, default=None): """ Try to determine the time this message claims to have been sent by analyzing various headers. @return: a L{Time} instance, or C{None}, if we don't have a guess. """ try: sentHeader = self.getHeader(u'date') except equotient.NoSuchHeader: sentHeader = None else: try: return Time.fromRFC2822(sentHeader) except ValueError: pass for received in list(self.getHeaders(u'received'))[::-1]: lines = received.value.splitlines() if lines: lastLine = lines[-1] parts = lastLine.split('; ') if parts: date = parts[-1] try: when = rfc822.parsedate(date) if when is None: continue except ValueError: pass else: return Time.fromStructTime(when) return default
def _fake_fetch(url_path, headers): """'Fetches' when using the fake kake-server.""" # We late-import here since these are not always ok to import in prod import kake.make abs_filename = project_root.join(url_path[1:]) if url_path.startswith('/genfiles'): try: file_has_changed = kake.make.build(url_path[1:]) except kake.make.BadRequestFailure as failure: logging.error(failure.message) return (failure.message, 400, {}) except (IOError, kake.make.CompileFailure) as why: logging.error('Unable to build %s: %s' % (url_path[1:], why)) return (None, 500, {}) logging.info('Building %s' % url_path[1:]) else: if not os.path.isfile(abs_filename): return (None, 404, {}) file_has_changed = True ims = [ v for (k, v) in headers.items() if k.lower() == 'if-modified-since' ] if ims: parsed_ims = time.mktime(rfc822.parsedate(ims[0])) if os.path.getmtime(abs_filename) <= parsed_ims: file_has_changed = False if not file_has_changed: return ('', 304, {}) with open(abs_filename) as f: return (f.read(), 200, {})
def collect_tweets(): """Collect new tweets about Firefox.""" with statsd.timer('customercare.tweets.time_elapsed'): auth = tweepy.OAuthHandler(settings.TWITTER_CONSUMER_KEY, settings.TWITTER_CONSUMER_SECRET, secure=True) auth.set_access_token(settings.TWITTER_ACCESS_TOKEN, settings.TWITTER_ACCESS_TOKEN_SECRET) api = tweepy.API(auth, parser=RawParser()) search_options = { 'q': 'firefox OR #fxinput OR @firefoxbrasil OR #firefoxos', 'rpp': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we # have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % ( settings.CC_TWEETS_PERPAGE)) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets try: raw_data = json.loads(str(api.search(**search_options))) except tweepy.TweepError, e: log.warning('Twitter request failed: %s' % e) return if not ('results' in raw_data and raw_data['results']): # Twitter returned 0 results. return # Drop tweets into DB for item in raw_data['results']: # Apply filters to tweet before saving # Allow links in #fxinput tweets statsd.incr('customercare.tweet.collected') item = _filter_tweet(item, allow_links='#fxinput' in item['text']) if not item: continue created_date = datetime.utcfromtimestamp(calendar.timegm( rfc822.parsedate(item['created_at']))) item_lang = item.get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() statsd.incr('customercare.tweet.saved') except IntegrityError: pass
def date_conversion(val): if re.match('^\d{4}\-\d\d\-\d\d$', val): # 2012-12-31 val = time.mktime(datetime.strptime(val, '%Y-%m-%d').timetuple()) elif re.match('^\d\d?\/\d\d\/\d{4}$', val): # 31/12/2012 val = time.mktime(datetime.strptime(val, '%d/%m/%Y').timetuple()) elif re.match('^\d\d?\.\d\d\.\d{4}$', val): # 31.12.2012 val = time.mktime(datetime.strptime(val, '%d.%m.%Y').timetuple()) elif re.match('^\d{4}\d{2}\d{2}$', val): # 20121231 val = time.mktime(datetime.strptime(val, '%Y%m%d').timetuple()) elif re.match('^(\w+\,\s+)?\d+ \w+ \d{4}\s+\d\d?\:\d\d(:\d\d)?', val) or re.match('^\w+,?\s+?\d+ \d{2-4}\s+\d\d?\:\d\d(:\d\d)?', val): # Jan 24 2003 15:26:20 +0000 # Mon, 20 Dec 04 08:37:31 GMT # Sat, 13 May 2006 06:15 +0000 # Mon, 01 Jul 2002 18:38:25 # Mon, 26 Sep 2005 7:35:00 -0800 # Thu, 23 Apr 2009 13:32:15 +1200 # 26 Aug 2009 02:07:34 +0400 import rfc822 import datetime as dt # [year, month, day, hour, min, sec] yyyy, mm, dd, hh, mins, ss = rfc822.parsedate(val)[:-3] val = time.mktime(dt.datetime(yyyy, mm, dd, hh, mins, ss).timetuple()) try: val = float(str(val)) except ValueError: pass return val
def my_tweets(request): if request.method == "POST": token = settings.TWITCN_PRIVATE_TOKEN api = getPrivateApi(token) info = "blank" if request.POST.get('action') == "save": max_id = request.POST.get('max_id', None) messages = api.GetUserTimeline(user='******', max_id=max_id, count=50) count = 0 for message in messages: if message.text[0] == "@" or \ MyTweet.objects.filter(tweet_id=message.id).count() > 0: continue added = datetime.datetime(*rfc822.parsedate(message.created_at)[:6]) MyTweet.objects.create(name=message.user.screen_name, text=smart_str(message.text), tweet_id=message.id, added=added) count += 1 info = "%s\n" % count return HttpResponse(info) messages = MyTweet.objects.filter(added__year=datetime.date.today().year) return render_to_response("webapps/favo_tweets.html", {'messages': messages}, context_instance=RequestContext(request))
def verify_online(self): url = self.download_url() try: res = urllib2.urlopen(HeadRequest(url)) except urllib2.HTTPError as e: return (False, e) if res.code != 200: return (False, "%d %s" % (res.code, res.msg)) sys.stderr.writelines("HEAD %s returned %d %s\n" % (url, res.code, res.msg)) content_type = res.headers['content-type'] if APK_CONTENT_TYPE != content_type: sys.stderr.writelines("warning: content type returned by %s should be %s, not %s\n" % (url, APK_CONTENT_TYPE, content_type)) last_modified = res.headers.get('last-modified', None) if last_modified: last_modified = datetime.datetime.fromtimestamp(time.mktime(rfc822.parsedate(last_modified))) sys.stderr.writelines("last modified %s\n" % last_modified) size = res.headers.get('content-length', None) if size and size < 4000: return (False, "content length of %s was less than 4k." % url) res.close() return (True, None)
def get_message(msg): fr = rfc822.parseaddr(msg['from']) to = rfc822.AddressList(msg['to']).addresslist cc = rfc822.AddressList(msg['cc']).addresslist subject = msg['subject'] date = rfc822.parsedate(msg['date']) date = datetime.datetime(*date[:6]) url = msg['Archived-At'] if not url: url = msg['X-Archived-At'] url = url.strip("<>") message_id = msg['Message-ID'] in_reply_to = msg.get('In-Reply-To', None) return { "from": fr, "subject": subject, "to": to, "cc": cc, "url": url, "date": date, "message_id": message_id, "in_reply_to": in_reply_to, "raw": msg.as_string() } return None
def BuildText(tweet): text = tweet['text'] text = text.replace('\r','').replace('\n','<br />') if 'entities' in tweet: for info in tweet['entities']['urls']: text = text.replace( info['url'], CreateLink(info['url'],info['expanded_url'])) for info in tweet['entities']['hashtags']: text = text.replace( '#' + info['text'], CreateLink("//twitter.com/search/%23"+info['text'],'#'+info['text'])) for info in tweet['entities']['user_mentions']: name = info['screen_name'] text = text.replace( '@' + name, CreateLink("//twitter.com/"+name,'@'+name)) text = '<div class="twitter-tweet"><span class="twitter-text">' + text + "</span>" created_at_str = calendar.timegm(rfc822.parsedate(tweet['created_at'])) created_at = datetime.datetime.fromtimestamp(created_at_str) screen_name = tweet['user']['screen_name'] id_str = tweet['id_str'] text += " " + CreateLink("//twitter.com/%s/status/%s"%(screen_name, id_str), created_at.strftime("%H:%M:%S"), "twitter-permalink") + "</div>" return text
def validate(self): if self.rfc2822_re.match(self.value): import calendar value = parsedate(self.value) try: if value[0] > 1900: dow = datetime.date(*value[:3]).strftime("%a") if self.value.find(',')>0 and dow.lower() != self.value[:3].lower(): self.log(IncorrectDOW({"parent":self.parent.name, "element":self.name, "value":self.value[:3]})) return except ValueError as e: self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":str(e)})) return if implausible_822(self.value): self.log(ImplausibleDate({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ValidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value})) else: value1,value2 = '', self.value value2 = re.sub(r'[\\](.)','',value2) while value1!=value2: value1,value2=value2,re.sub('\([^(]*?\)',' ',value2) if not self.rfc822_re.match(value2.strip().lower()): self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ProblematicalRFC822Date({"parent":self.parent.name, "element":self.name, "value":self.value}))
def getPage(self, request, response): """TODO: Change this to access the Indexer instead of the Store""" path = urllib.unquote((request.getPathInfo())[1:]) # replace the above with: # path = '/'.join(request.getRequestURL().split('/')[2:]) # if the snakelet matches an arbitrary pattern if path == '': path = 'HomePage' a = self.getWebApp() ac = self.getAppContext() ac.indexer.registerHit(path) buffer = request.getHeader('If-Modified-Since') if buffer != None: since = time.mktime(rfc822.parsedate(buffer)) try: # see if our page has been rendered and has a modification time our = ac.cache.mtime('soup:' + path) if (since > our): # Reset some headers response.setHeader("Cache-Control", '') response.setHeader("Pragma", '') response.setHeader("Expires", '') # Say bye bye response.setResponse(304, "Not Modified") return None except KeyError: pass # Check for any standing redirects redirect = self.checkRedirects(ac, path) if redirect: response.HTTPredirect(ac.base + redirect) return None # Check for a URL variant try: page = ac.store.getRevision(path) except IOError: alias = ac.indexer.resolveAlias(path, True) # go for approximate matches if alias != path: response.HTTPredirect(ac.base + alias) return else: page = ac.store.getRevision("meta/EmptyPage") return (page.headers, renderPage(ac, page, request, response, ac.indexer.done)) if 'x-redirect' in page.headers.keys(): uri = page.headers['x-redirect'] (schema, netloc, path, parameters, query, fragment) = urlparse.urlparse(uri) if schema in self.i18n['uri_schemas'].keys(): path = uri else: path = ac.base + path response.HTTPredirect(path) return return (page.headers, renderPage(ac, page, request, response, ac.indexer.done))
def test_1(self): """ Verify the handler reples with an accurate Last-Modified header. """ bogus_start_response = mock.Mock() # Send a request to the handler. self.sh({'PATH_INFO': '/static/pitz.css'}, bogus_start_response) assert bogus_start_response.called assert bogus_start_response.call_args[0][0] == '200 OK', \ bogus_start_response.call_args[0][0] headers = bogus_start_response.call_args[0][1] # Make sure that there's a Last-Modified header. values = [v for (k, v) in headers if k == 'Last-Modified'] assert len(values) == 1, headers last_modified_header = values[0] assert self.modified_time == datetime.datetime( *(rfc822.parsedate(last_modified_header))[:6])
def get_messages(host, user, password): """Download all messages (e.g. pages) from the mailbox Keyword arguments: host -- IMAP server user -- login connection information password -- password connection information See http://stackoverflow.com/questions/315362/properly-formatted-example-for-python-imap-email-access """ M = imaplib.IMAP4_SSL(host) M.login(user, password) M.select() typ, data = M.search(None, 'UNSEEN') messages = [] for num in data[0].split(): typ, data = M.fetch(num, '(RFC822)') file = StringIO.StringIO(data[0][1]) message = rfc822.Message(file) msgDate = time.mktime(rfc822.parsedate(message['date'])) messages.append( create_event('IMAP', message['from'], message['to'], message['subject'], int(msgDate))) M.close() M.logout() log.info('Found %d unseen messages at %s', len(messages), host) return messages
def BuildPost(timeline, day): if not isinstance(timeline, list): return None tweets = [] for elem in timeline: # determine elem is tweet or not if 'user' not in elem or 'created_at' not in elem or 'text' not in elem: continue # maybe tweet created_at_str = calendar.timegm(rfc822.parsedate(elem['created_at'])) created_at = datetime.datetime.fromtimestamp(created_at_str) # filter tweet at yesterday if day.year != created_at.year or day.month != created_at.month or day.day != created_at.day: continue text = BuildText(elem) tweets.append(text) post = "".join(reversed(tweets)) #post = "<ul class=\"twitter-log\">\n " + post + "\n</ul>"; return post
def has_modified_header(headers): """ Check request header for 'if-modified-since'. Return True if content wasn't modified (According to the timestamp) """ global DATA_LAST_MODIFIED modified = headers.get('if-modified-since') if modified: oldLastModified = DATA_LAST_MODIFIED try: mtime = path.getmtime(TMPFILE_DATA_TIMESTAMP) except OSError: with open(TMPFILE_DATA_TIMESTAMP, 'a'): utime(TMPFILE_DATA_TIMESTAMP, None) mtime = time() DATA_LAST_MODIFIED = email.utils.formatdate(mtime, usegmt=True) if DATA_LAST_MODIFIED != oldLastModified: # reload attributes if index changed get_attributes_values('ind_name_exact', CHECK_ATTR_FILTER) # pprint([headers, modified, DATA_LAST_MODIFIED, mtime]) # pprint([mtime, rfc822.parsedate(modified), mktime(rfc822.parsedate(modified))]) modified_file = datetime.fromtimestamp(mtime) modified_file = modified_file.replace(microsecond=0) modified_date = datetime.fromtimestamp(mktime(rfc822.parsedate(modified))) # pprint([ # 'Data: ', modified_file, # 'Header: ', modified_date, # modified_file <= modified_date, # ]) if modified_file <= modified_date: return True return False
def parse_headers(self, header, meta): meta.title = header.get('Subject') if header.get('Message-Id'): meta.foreign_id = string_value(header.get('Message-Id')) if header.get('From'): addr = address.parse(header.get('From')) if addr is not None: meta.author = addr.to_unicode() meta.add_email(addr.address) for hdr in ['To', 'CC', 'BCC']: if header.get(hdr): for addr in address.parse_list(header.get(hdr)): meta.add_email(addr.address) date = header.get('Date') date = rfc822.parsedate(date) if date is not None: dt = datetime.fromtimestamp(mktime(date)) meta.add_date(dt) meta.headers = dict([(k, string_value(v)) for k, v in header.items()]) return meta
def object_app(environ, start_response): path = os.path.join(DATAROOT, environ['PATH_INFO'][1:]) f = open(path, 'rb') stat = os.fstat(f.fileno()) expire = datetime.utcnow() + timedelta(days=365) expirestr = expire.strftime('%a, %d %b %Y %H:%M:%S GMT') etag = '"' + str(stat.st_mtime) + "_" + str(stat.st_size) + '"' headers = [('Content-Type', guess_mime_type(path)), ('Content-Length', str(stat.st_size)), ('Last-Modified', rfc822.formatdate(stat.st_mtime)), ('Expires', expirestr), ('ETag', etag)] for key, value in diamond_textattr(path): # we probably should filter out invalid characters for HTTP headers key = 'x-attr-' + key headers.append((key, value)) if_modified = environ.get('HTTP_IF_MODIFIED_SINCE') if_none = environ.get('HTTP_IF_NONE_MATCH') if (if_modified and (rfc822.parsedate(if_modified) >= stat.st_mtime)) or \ (if_none and (if_none == '*' or etag in if_none)): start_response("304 Not Modified", headers) return [""] start_response("200 OK", headers) # wrap the file object in an iterator that reads the file in 64KB blocks # instead of line-by-line. return environ['wsgi.file_wrapper'](f, 65536)
def serve_static(self, fs_path, ims): """Given a filesystem path to a static resource, serve it. This is factored out for easier reuse. """ # Get basic info from the filesystem and start building a response. # ================================================================= mtime = os.stat(fs_path)[stat.ST_MTIME] content_type = mimetypes.guess_type(fs_path)[0] or 'text/plain' response = Response(200) # Support 304s, but only in deployment mode. # ========================================== if self.deploy_mode: if ims: mod_since = rfc822.parsedate(ims) last_modified = time.gmtime(mtime) if last_modified[:6] <= mod_since[:6]: response.code = 304 # Finish building the response and raise it. # ======================================== response.headers['Last-Modified'] = rfc822.formatdate(mtime) response.headers['Content-Type'] = content_type if response.code != 304: response.body = file(fs_path, 'rb').read() raise response
def test_1(self): """ Verify the handler reples with an accurate Last-Modified header. """ bogus_start_response = mock.Mock() # Send a request to the handler. self.sh({'PATH_INFO': '/static/pitz.css'}, bogus_start_response) assert bogus_start_response.called assert bogus_start_response.call_args[0][0] == '200 OK', \ bogus_start_response.call_args[0][0] headers = bogus_start_response.call_args[0][1] # Make sure that there's a Last-Modified header. values = [v for (k, v) in headers if k == 'Last-Modified'] assert len(values) == 1, headers last_modified_header = values[0] assert self.modified_time == datetime.datetime( *(rfc822.parsedate( last_modified_header))[:6])
def CreatedAtInSeconds(self): """Get the time this direct message was posted, in seconds since the epoch. Returns: The time this direct message was posted, in seconds since the epoch. """ return timegm(parsedate(self.created_at))
def connect(self, ): urlparsed = requests.utils.urlparse(self.url) self.wc = WebDAVClient(host=urlparsed.netloc, protocol=urlparsed.scheme) self.wc.setbasicauth(self.login.encode('utf-8'), self.passwd.encode('utf-8')) time_delta = None local_time = datetime.datetime.utcnow() response = self.wc.options('/').headers.get('date') if response is None: response = self.wc.options('/').headers.get('Date') remote_datetime = \ rfc822.parsedate(response) self.timedelta = time.mktime(local_time.utctimetuple()) \ - time.mktime(remote_datetime) self._check_notes_folder() return time_delta
def verify_online(self): url = self.download_url() try: res = urllib2.urlopen(HeadRequest(url)) except urllib2.HTTPError as e: return (False, e) if res.code != 200: return (False, "%d %s" % (res.code, res.msg)) sys.stderr.writelines("HEAD %s returned %d %s\n" % (url, res.code, res.msg)) content_type = res.headers['content-type'] if APK_CONTENT_TYPE != content_type: sys.stderr.writelines( "warning: content type returned by %s should be %s, not %s\n" % (url, APK_CONTENT_TYPE, content_type)) last_modified = res.headers.get('last-modified', None) if last_modified: last_modified = datetime.datetime.fromtimestamp( time.mktime(rfc822.parsedate(last_modified))) sys.stderr.writelines("last modified %s\n" % last_modified) size = res.headers.get('content-length', None) if size and size < 4000: return (False, "content length of %s was less than 4k." % url) res.close() return (True, None)
def import_buffer (tablename, buffername): table = new.load_table(tablename) newtable = new.convert_table(table) buf = self.session.get_buffer_by_name(buffername) buf.storage.extend(newtable) buf.storage.sort(key=lambda a: calendar.timegm(rfc822.parsedate(a['created_at']))) buf.storage = misc.RemoveDuplicates(buf.storage, lambda x: x['id'])
def getLastModified(self): """ Return last modification of a resource as time tuple or C{None}. @rtype: C{time.struct_time} """ datetimeString = None xml = self.properties.get(Constants.PROP_LAST_MODIFIED) if xml: datetimeString = xml.textof() if datetimeString: try: result = rfc822.parsedate(datetimeString) if result is None: result = _parseIso8601String( datetimeString ) # Some servers like Tamino use ISO 8601 return time.struct_time(result) except ValueError: self._logger.debug( "Invalid date format: " "The server must provide a RFC822 or ISO8601 formatted date string.", exc_info=True)
def _date(self, datefield): t = rfc822.parsedate(datefield) if t == None: print 'Warning! Could not parse ' + str( datefield) + '. Skipping.' return None return datetime.datetime.fromtimestamp(time.mktime(t))
def update_cache(self, key, value, headers): cache = True expires = int(time()) + 3600 expires = headers.get('Expires', None) if expires: expires = mktime(parsedate(expires)) else: expires = int(time()) + 3600 cache_control = headers.get('Cache-Control', '') for control in cache_control.split(','): control = control.strip(' ') control = control.split('=') if len(control) == 2: k, v = control else: k = control v = None if k in ('private', 'no-cache', 'no-store', 'must-revalidate'): cache = False if k in ('max-age', 'min-fresh'): try: expires = int(time()) + int(v) except ValueError: pass if cache: self.cache[key] = (expires, value)
def download_message(mail, data): msg = email.message_from_string(data) logging.debug("Message-ID is %s" % msg["Message-ID"]) sender_name, sender_addr = decode_sender(msg.get_all("From")[0]) sender_phone = get_phone_number(msg) date = rfc822.parsedate(msg["Date"]) status = False for part in msg.walk(): if part.get_content_maintype() == "multipart": continue if part.get("Content-Disposition") is None: continue name = decode_file_name(part.get_filename()) if name is None: logging.debug("Message part has no name, skipped.") continue data = part.get_payload(decode=True) if process_file(name, data, sender_name, sender_addr, sender_phone, date): status = True return status
def get_messages(host, user, password): """Download all messages (e.g. pages) from the mailbox Keyword arguments: host -- IMAP server user -- login connection information password -- password connection information See http://stackoverflow.com/questions/315362/properly-formatted-example-for-python-imap-email-access """ M = imaplib.IMAP4_SSL(host) M.login(user, password) M.select() typ, data = M.search(None, 'UNSEEN') messages = [] for num in data[0].split(): typ, data = M.fetch(num, '(RFC822)') file = StringIO.StringIO(data[0][1]) message = rfc822.Message(file) msgDate = time.mktime(rfc822.parsedate(message['date'])) messages.append(create_event('IMAP', message['from'], message['to'], message['subject'], int(msgDate))) M.close() M.logout() log.info('Found %d unseen messages at %s', len(messages), host) return messages
def parse_headers(self, msg, meta): meta.title = msg.subject if msg.headers.get('Message-Id'): meta.foreign_id = unicode(msg.headers.get('Message-Id')) if msg.headers.get('From'): addr = address.parse(msg.headers.get('From')) if addr is not None: meta.author = addr.to_unicode() for hdr in ['To', 'CC', 'BCC']: if msg.headers.get(hdr): for addr in address.parse_list(msg.headers.get(hdr)): meta.add_recipient(addr.to_unicode()) date = msg.headers.get('Date') date = rfc822.parsedate(date) if date is not None: dt = datetime.fromtimestamp(mktime(date)) meta.add_date(dt) meta.headers = dict([(k, unicode(v)) for k, v in msg.headers.items()]) return meta
def load_tweets(**kwargs): args = dict(count=20, q=Search_key) args.update(**kwargs) url = 'https://api.twitter.com/1.1/search/tweets.json?' + urlencode(args) user_timeline = TweetOuth.tweet_req(url) tweets=json.loads(user_timeline.decode('utf-8')) if type(tweets) == dict and u'errors' in tweets: if repeat and tweets[u'errors'][0]["code"]==88: print(tweets[u'errors'][0]["message"],file=sys.stderr) time.sleep(1000) return load_tweets(**kwargs) if tweets[u'errors'][0]["code"] in (32,89,99): raise InvalidTokenError(tweets[u'errors'][0]['message']) if tweets[u'errors'][0]["code"]==88: raise OverflowError(tweets[u'errors'][0]['message']) raise Exception(tweets[u'errors'][0]['message']) for twit in tweets[u'statuses']: c.execute('INSERT INTO tweet (user, tweet_id, created, text, source, screan_name, description) VALUES (?, ?, ?, ?, ?, ?, ?)', (twit[u'user'][u'name'], twit['id'], time.mktime(rfc822.parsedate(twit['created_at'])), twit['text'], twit['source'], twit[u'user'][u'screen_name'], twit[u'user'][u'description'])) c.commit() return len(tweets[u'statuses'])
def holvagytok(cookie): cookietoken = CookieToken.all().filter('cookie = ', cookie).get() credentials = foursquare.OAuthCredentials(oauth_key, oauth_secret) user_token = oauth.OAuthToken(cookietoken.token, cookietoken.secret) credentials.set_access_token(user_token) fs = foursquare.Foursquare(credentials) fscheckins = fs.checkins()['checkins'] venyuz = [] for checkin in fscheckins: if 'venue' in checkin: venue = checkin['venue'] user = checkin['user'] # ha létezik a venyuzban a venyu, akkor csak a dátumot és az ottlevőket frissítse # nemszép! pfuj! fixme! venyunevek = [x['name'] for x in venyuz] checkintimetuple = date_diff(datetime.fromtimestamp(time.mktime(parsedate(checkin['created'])))).decode("utf-8") if venue['name'] in venyunevek: ezittmost = venyuz[venyunevek.index(venue['name'])] #logging.error(ezittmost) ezittmost['here'].append(user) if checkintimetuple > ezittmost['lastseen']: ezittmost['lastseen'] = checkintimetuple else: # ha még nincs, akkor adja hozzá if 'geolat' in venue: venyuz.append({ 'name': venue['name'], 'geolat': venue['geolat'], 'geolong': venue['geolong'], 'lastseen': checkintimetuple, 'here': [user] }) else: pass return venyuz
def touch(self, filename): """Set last modified date on filename.""" if self.lastModified: tt = rfc822.parsedate(self.lastModified) if tt: mtime = time.mktime(tt) os.utime(filename, (mtime, mtime))
def GetCreatedAtInSeconds(self): '''Get the time this status message was posted, in seconds since the epoch. Returns: The time this status message was posted, in seconds since the epoch. ''' return timegm(rfc822.parsedate(self.created_at))
def seconds_since_epoch_of_checkin(c): import rfc822 try: checkin_ts = time.mktime(rfc822.parsedate(c['created'])) except Exception, e: logging.error("Unable to parse date of checkin %s: %s" % (`c`, e)) raise FourMapperException(500, 'Unable to parse date in checkin')
def collect_tweets(): # Don't (ab)use the twitter API from dev and stage. if settings.STAGE: return """Collect new tweets about Firefox.""" with statsd.timer('customercare.tweets.time_elapsed'): t = Twython(settings.TWITTER_CONSUMER_KEY, settings.TWITTER_CONSUMER_SECRET, settings.TWITTER_ACCESS_TOKEN, settings.TWITTER_ACCESS_TOKEN_SECRET) search_options = { 'q': ('firefox OR #fxinput OR @firefoxbrasil OR #firefoxos ' 'OR @firefox_es'), 'count': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we # have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % settings.CC_TWEETS_PERPAGE) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets results = t.search(**search_options) if len(results['statuses']) == 0: # Twitter returned 0 results. return # Drop tweets into DB for item in results['statuses']: # Apply filters to tweet before saving # Allow links in #fxinput tweets statsd.incr('customercare.tweet.collected') item = _filter_tweet(item, allow_links='#fxinput' in item['text']) if not item: continue created_date = datetime.utcfromtimestamp(calendar.timegm( rfc822.parsedate(item['created_at']))) item_lang = item['metadata'].get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() statsd.incr('customercare.tweet.saved') except IntegrityError: pass
def dump_tweets(q, since_id=0, verbose=True, rpp=100, result_type = 'mixed', db_cursor=False, db_table=False): base_url = "http://search.twitter.com/search.json" query = "?" + urllib.urlencode({'q' : q, 'since_id' : since_id, 'rpp' : rpp, 'result_type' : result_type, 'page' : 1, 'include_entities': 1 }) max_id = counter = 0 for c in range(1, 15): url = base_url + query if verbose: print >> sys.stderr, url raw_response = urllib2.urlopen(url) json_response = json.load(raw_response) max_id = json_response["max_id"] raw_response.close() all_tweets = json_response["results"] counter = counter + len(all_tweets) for tweet in all_tweets: print tweet id = tweet["id"] timestamp = calendar.timegm(rfc822.parsedate(tweet["created_at"])) from_user = clean_string(tweet["from_user"]) ##from_user_id = clean_string(tweet["from_user_id"]) text = clean_string(tweet["text"]) ##iso_language_code = "" iso_language_code = tweet["iso_language_code"] ## "geo":{"coordinates":[48.748530,2.448800],"type":"Point"} if tweet["geo"]: geo_lat = tweet["geo"]["coordinates"][0] geo_long = tweet["geo"]["coordinates"][1] else: geo_lat = 0.0 geo_long = 0.0 row = str(id) + " : " + str(timestamp) + " : " + from_user + " : " + text + " : " + iso_language_code print row.encode('utf8') if db_cursor != False: sql_statement = u"""insert into %s (id, from_user, timestamp, text, iso_language_code, geo_lat, geo_long) values (%d, '%s', %d, '%s', '%s', %f, %f)""" % (db_table, id, from_user, timestamp, text.replace("'","\\'"), iso_language_code, geo_lat, geo_long) ##print >> sys.stderr, sql_statement try: db_cursor.execute(sql_statement.encode('utf8')) db_cursor.connection.commit() except MySQLdb.Error, e: print >> sys.stderr, "Error %d: %s" % (e.args[0], e.args[1]) print >> sys.stderr, "Skipping inserting this tweet to the DB" ##print json_response["next_page"] if "next_page" in json_response.keys(): query = json_response["next_page"] else: break
def check_modified_since(self, cache): modified_since_str = self.request.META.get("HTTP_IF_MODIFIED_SINCE", None) if modified_since_str: modified_since = time.mktime(parsedate(modified_since_str)) file_time = time.mktime(cache.original_file_time()) if modified_since >= file_time: return HttpResponseNotModified()
def import_buffer(tablename, buffername): table = new.load_table(tablename) newtable = new.convert_table(table) buf = self.session.get_buffer_by_name(buffername) buf.storage.extend(newtable) buf.storage.sort(key=lambda a: calendar.timegm( rfc822.parsedate(a['created_at']))) buf.storage = misc.RemoveDuplicates(buf.storage, lambda x: x['id'])
def created_at_in_seconds(self): """ Get the time this status message was posted, in seconds since the epoch (1 Jan 1970). Returns: int: The time this status message was posted, in seconds since the epoch. """ return timegm(parsedate(self.created_at))