def schedularRunner():
    latestNews = dao.selectLast('derananews')

    print latestNews

    aggrigater = DeranaNewsAggregator()
    classifier = MultinomialNBClassifier()

    list = aggrigater.aggriagteNews("http://sinhala.adaderana.lk/rsshotnews.php")

    latestNewsList = [News]

    for news in list:
        if rfc822.parsedate_tz(news.publishDate) == rfc822.parsedate_tz(latestNews):
            break
        else:
            latestNewsList.append(news)

    preprocessor.prepocessor(latestNewsList)

    classifier.classify(latestNewsList)

    for news in latestNewsList:

        title = news.title
        newsSite = str(news.newsSite)
        category = news.category[0]
        link = news.link
        pubDate = str(news.publishDate)
        description = news.summary
        imgLink = news.imageLink
        if description != '':
            dao.insertNews(title, link, description, imgLink, pubDate, category, newsSite)
Ejemplo n.º 2
0
def _entry_disposition(response_headers, request_headers):
    """Determine freshness from the Date, Expires and Cache-Control headers.

    We don't handle the following:

    1. Cache-Control: max-stale
    2. Age: headers are not used in the calculations.

    Not that this algorithm is simpler than you might think 
    because we are operating as a private (non-shared) cache.
    This lets us ignore 's-maxage'. We can also ignore
    'proxy-invalidate' since we aren't a proxy.
    We will never return a stale document as 
    fresh as a design decision, and thus the non-implementation 
    of 'max-stale'. This also lets us safely ignore 'must-revalidate' 
    since we operate as if every server has sent 'must-revalidate'.
    Since we are private we get to ignore both 'public' and
    'private' parameters. We also ignore 'no-transform' since
    we don't do any transformations.    
    The 'no-store' parameter is handled at a higher level.
    So the only Cache-Control parameters we look at are:

    no-cache
    only-if-cached
    max-age
    min-fresh
    """
    
    retval = "STALE"
    cc = _parse_cache_control(request_headers)
    cc_response = _parse_cache_control(response_headers)

    if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
        retval = "TRANSPARENT"
        if 'cache-control' not in request_headers:
            request_headers['cache-control'] = 'no-cache'
    elif cc.has_key('no-cache'):
        retval = "TRANSPARENT"
    elif cc_response.has_key('no-cache'):
        retval = "STALE"
    elif cc.has_key('only-if-cached'):
        retval = "FRESH"
    elif response_headers.has_key('date'):
        date = calendar.timegm(rfc822.parsedate_tz(response_headers['date']))
        now = time.time()
        current_age = max(0, now - date)
        if cc_response.has_key('max-age'):
            freshness_lifetime = int(cc_response['max-age'])
        elif response_headers.has_key('expires'):
            expires = rfc822.parsedate_tz(response_headers['expires'])
            freshness_lifetime = max(0, calendar.timegm(expires) - date)
        else:
            freshness_lifetime = 0
        if cc.has_key('max-age'):
            freshness_lifetime = min(freshness_lifetime, int(cc['max-age']))
        if cc.has_key('min-fresh'):
            current_age += int(cc['min-fresh'])
        if freshness_lifetime > current_age:
            retval = "FRESH"
    return retval 
Ejemplo n.º 3
0
def get_delivery_time (msg):
    # Figure out the delivery time.
    dtime = None
    if msg.has_key("Delivery-date"):
        # eg. "Thu, 12 Jul 2001 08:47:20 -0400" to 994942040 (seconds
        # since epoch in UTC)
        dtime = mktime_tz(parsedate_tz(msg["Delivery-date"]))
    elif msg.unixfrom:
        # Parse eg.
        #   "From [email protected] Thu Jul 12 08:47:20 2001"
        # -- this is the "From " line format used by Exim; hopefully other
        # MTAs do the same!
        m = re.match(r'^From (\S+) +(\w{3} \w{3}\s+\d\d? \d\d:\d\d:\d\d \d{4})$',
                     msg.unixfrom)
        if not m:
            warn("warning: could not parse \"From \" line: %s" % msg.unixfrom)
        else:
            (return_path, dtime_str) = m.groups()
            # Eg. "Thu Jul 12 08:47:20 2001" -> 994945640 -- note that
            # this might be different from what we get parsing the same
            # date string above, because this one doesn't include the
            # timezone.  Sigh.
            dtime = mktime(strptime(dtime_str, "%c"))

            # Attempt to detect and correct for DST differences.
            # (This works if we parsed a summer time during the winter;
            # what about the inverse?)
            dtime_str_curtz = ctime(dtime)
            if dtime_str_curtz != dtime_str:
                dtime_curtz = mktime(strptime(dtime_str_curtz, "%c"))
                diff = dtime_curtz - dtime
                dtime -= diff

    return dtime
Ejemplo n.º 4
0
    def execute(self, observation):
        station_id = observation['station_id']

        raw_time = observation['observation_time_rfc822']
        parsed_time = datetime.datetime.fromtimestamp(
            rfc822.mktime_tz(rfc822.parsedate_tz(raw_time)))

        epoch = datetime.datetime.utcfromtimestamp(0)
        delta = int((parsed_time - epoch).total_seconds())

        observation['ObservationTime'] = delta
        observation['StationId'] = station_id

        composite_key = "%s_%d" % (station_id, delta)
        observation['CompositeKey'] = composite_key

        region = os.environ['AWS_DEFAULT_REGION']
        accessKey = os.environ['AWS_ACCESS_KEY']
        secretKey = os.environ['AWS_SECRET_KEY']

        try:
            connx = boto.dynamodb2.connect_to_region(
                region,
                aws_access_key_id=accessKey,
                aws_secret_access_key=secretKey)
            obs_table = Table('VocalPelicanObservation', connection=connx)
            test_row = obs_table.get_item(CompositeKey=composite_key)
        except JSONResponseError as responseError:
            # authentication problem
            print responseError
        except boto.dynamodb2.exceptions.ItemNotFound as responseError:
            # not found implies safe to add
            return obs_table.put_item(observation)

        return False
Ejemplo n.º 5
0
 def parse_term(term):
     if term['type'] == 'IRI':
         return URIRef(term['value'])
     elif term['type'] == 'literal':
         datatype = URIRef(term.get('datatype', None))
         if datatype == XSD.dateTime:
             try:
                 term['value'] = float(term['value'])
                 term['value'] = datetime.utcfromtimestamp(term['value'])
             except:
                 try:
                     term['value'] = isodate.parse_datetime(term['value'])
                 except:
                     timestamp = mktime_tz(parsedate_tz(term['value']))
                     term['value'] = datetime.fromtimestamp(timestamp)
         if datatype == RDFS.Literal:
             datatype = None
             try:
                 term['value'] = float(term['value'])
             except:
                 pass
         return Literal(term['value'], datatype=datatype)
     else:
         bid = term['value'].split(':')[1]
         if bid not in bid_map:
             bid_map[bid] = shortuuid.uuid()
         return BNode(bid_map[bid])
Ejemplo n.º 6
0
Archivo: key.py Proyecto: gefilte/boto
    def get_contents_to_filename(self, filename, headers=None,
                                 cb=None, num_cb=10,
                                 torrent=False,
                                 version_id=None,
                                 res_download_handler=None,
                                 response_headers=None):
        """
        Retrieve an object from S3 using the name of the Key object as the
        key in S3.  Store contents of the object to a file named by 'filename'.
        See get_contents_to_file method for details about the
        parameters.
        
        :type filename: string
        :param filename: The filename of where to put the file contents
        
        :type headers: dict
        :param headers: Any additional headers to send in the request
        
        :type cb: function
        :param cb: a callback function that will be called to report
                   progress on the upload.  The callback should accept
                   two integer parameters, the first representing the
                   number of bytes that have been successfully
                   transmitted to S3 and the second representing the
                   size of the to be transmitted object.
                    
        :type cb: int
        :param num_cb: (optional) If a callback is specified with
                       the cb parameter this parameter determines the
                       granularity of the callback by defining
                       the maximum number of times the callback will
                       be called during the file transfer.  
             
        :type torrent: bool
        :param torrent: If True, returns the contents of a torrent file
                        as a string.

        :type res_upload_handler: ResumableDownloadHandler
        :param res_download_handler: If provided, this handler will
                                     perform the download.

        :type response_headers: dict
        :param response_headers: A dictionary containing HTTP headers/values
                                 that will override any headers associated with
                                 the stored object in the response.
                                 See http://goo.gl/EWOPb for details.
        """
        fp = open(filename, 'wb')
        self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
                                  version_id=version_id,
                                  res_download_handler=res_download_handler,
                                  response_headers=response_headers)
        fp.close()
        # if last_modified date was sent from s3, try to set file's timestamp
        if self.last_modified != None:
            try:
                modified_tuple = rfc822.parsedate_tz(self.last_modified)
                modified_stamp = int(rfc822.mktime_tz(modified_tuple))
                os.utime(fp.name, (modified_stamp, modified_stamp))
            except Exception: pass
Ejemplo n.º 7
0
def process_reply_file(current, fname):
    new_note = {}
    reply = open(fname, "r")
    msg = rfc822.Message(reply)
    new_note['text'] = "%s\n%s" % (msg['From'], msg.fp.read())
    new_note['timestamp'] = rfc822.parsedate_tz(msg['Date'])
    current["notes"].append(new_note)
Ejemplo n.º 8
0
def createEvent(evType, tstamp, name = None, contextList = [], 
                entityList = []):
    """
    Create an XML element representing an event. Returns the XML object

    It expects:
    evType: Enum
    tstamp: datetime object
    name : string
    contextList: List of context elements
    entityList: List of entity elements
    """

    result = etree.Element('event')

    result.attrib['type'] = eventName(evType)
    if tstamp == None:
        tstamp = datetime.datetime.now()
    result.attrib['datetime'] = rfc822.formatdate(rfc822.mktime_tz(rfc822.parsedate_tz(tstamp.strftime("%a, %d %b %Y %H:%M:%S"))))
    if name != None:
        result.attrib['name'] = name

    for el in  entityList + contextList:
        result.append(el)

    # Create the ID
    m = hashlib.sha1()
    m.update(etree.tostring(result))
    result.attrib['id'] = m.hexdigest()

    return result
Ejemplo n.º 9
0
    def post(self):
        backend = get_proxy_backend()

        def our_finish():
            self.add_header("Content-type", "application/json")
            self.write(json.dumps({"result": "ok"}))
            self.finish()

        if backend is not None:
            data = self.request.body
            obj = json.loads(data)

            url = obj["url"]
            metadata = obj["metadata"]
            proxy_ip = obj["proxy_ip"]
            proxy_port = obj["proxy_port"]

            print "GOT MESH-NOTIFY from %s:%d %s" % (proxy_ip, proxy_port, url)

            last_modified = None
            for key, val in metadata["headers"]:
                if key.lower() == "last-modified":
                    last_modified = rfc822.mktime_tz(rfc822.parsedate_tz(val))
                    break

            entry = {"url": url, "last_modified": last_modified}

            tracker = LimitTracker(NOTIFY_SIMULTANEOUS_DOWNLOADS)
            backend.download_entries(proxy_ip, proxy_port, [entry], our_finish, tracker)
        else:
            our_finish()
Ejemplo n.º 10
0
	def matches_value( self, v ):
		t0 = time.time()

		then = rfc822.parsedate_tz( v )
		t1 = rfc822.mktime_tz(then)
		
		return (t0 - t1) > self.age
Ejemplo n.º 11
0
 def _onsuccess(response):
     if response.status == 200:
         checksum = response.headers['Etag'].strip('"')
         last_modified = response.headers['Last-Modified']
         modified_tuple = rfc822.parsedate_tz(last_modified)
         modified_stamp = int(rfc822.mktime_tz(modified_tuple))
         return {'checksum': checksum, 'last_modified': modified_stamp}
Ejemplo n.º 12
0
    def open(self):
        # XXX in future add support for compression
        headers = {'Accept-Encoding': ''}
        if _requests_version == '0':
            self._data_response = self._session.get(self._url('data'),
                                                    prefetch=False,
                                                    headers=headers)
        else:
            self._data_response = self._session.get(self._url('data'),
                                                    stream=True,
                                                    headers=headers)
        self._validate_response(self._data_response)

        size = self._data_response.headers.get('Content-Length', None)
        if size is not None:
            size = int(size)
        self._size = size

        modified = self._data_response.headers.get('Last-Modified', None)
        if modified is not None:
            modified = rfc822.mktime_tz(rfc822.parsedate_tz(modified))
        self._modified = modified

        mimetype = self._data_response.headers.get('Content-Type',
                                                   'application/octet-stream')
        self._mimetype = mimetype

        return self._data_response.raw
Ejemplo n.º 13
0
def _parse_midmo_date(datestring):
    """
    returns a local datetime corresponding to 
    the datestring given.
    """
    # these appear to be rfc822/2822, not documented.
    return datetime.fromtimestamp(rfc822.mktime_tz(rfc822.parsedate_tz(datestring)))
Ejemplo n.º 14
0
    def _parse_sibling(self, sibling, headers, data):
        """
        Parses a single sibling out of a response.
        """

        sibling.exists = True

        # Parse the headers...
        for header, value in headers:
            header = header.lower()
            if header == "content-type":
                sibling.content_type, sibling.charset = self._parse_content_type(value)
            elif header == "etag":
                sibling.etag = value
            elif header == "link":
                sibling.links = self._parse_links(value)
            elif header == "last-modified":
                sibling.last_modified = mktime_tz(parsedate_tz(value))
            elif header.startswith("x-riak-meta-"):
                metakey = header.replace("x-riak-meta-", "")
                sibling.usermeta[metakey] = value
            elif header.startswith("x-riak-index-"):
                field = header.replace("x-riak-index-", "")
                reader = csv.reader([value], skipinitialspace=True)
                for line in reader:
                    for token in line:
                        token = decode_index_value(field, token)
                        sibling.add_index(field, token)
            elif header == "x-riak-deleted":
                sibling.exists = False

        sibling.encoded_data = data

        return sibling
Ejemplo n.º 15
0
	def populate(self, sub):
		file = open(os.path.join(self.archdir, str(sub), 'index'))
		linepair = file.readline() + file.readline()
		prev_timestamp = 0
		while linepair:
			match = _rx_index.match(linepair.rstrip())
			if match:
				g = match.groups()
				msgnum = int(g[0])
				try:
					timestamp = rfc822.mktime_tz(rfc822.parsedate_tz(g[3]))
				except:
					timestamp = prev_timestamp + 1
				prev_timestamp = timestamp
				localtime = time.localtime(timestamp)
				self.msgs[msgnum] = {
					MSGNUM: msgnum,
					THREADID: g[1],
					SUBJECT: g[2],
					DATE: g[3],
					TIMESTAMP: timestamp,
					AUTHORID: g[4],
					AUTHOR: g[5],
					MONTH: localtime[0] * 100 + localtime[1],
					}
			linepair = file.readline() + file.readline()
		file.close()
Ejemplo n.º 16
0
def _readdate(txt):
    """Interpret the string as a date value."""
    import rfc822
    date = rfc822.parsedate_tz(txt.strip())
    if date is not None:
        return rfc822.mktime_tz(date)
    return None
Ejemplo n.º 17
0
def parse_pubdate(text):
    """Parse a date string into a Unix timestamp

    >>> parse_pubdate('Fri, 21 Nov 1997 09:55:06 -0600')
    880127706

    >>> parse_pubdate('')
    0

    >>> parse_pubdate('unknown')
    0
    """
    if not text:
        return 0

    parsed = parsedate_tz(text)
    if parsed is not None:
        return int(mktime_tz(parsed))

    # TODO: Fully RFC 3339-compliant parsing (w/ timezone)
    try:
        parsed = time.strptime(text[:19], '%Y-%m-%dT%H:%M:%S')
        if parsed is not None:
            return int(time.mktime(parsed))
    except Exception:
        pass

    logger.error('Cannot parse date: %s', repr(text))
    return 0
Ejemplo n.º 18
0
 def _spew_message(self, id, msg, flags, uid):
     bits = []
     if uid:
         bits.append('UID %s' % msg.uid)
     for flag in flags:
         if flag == 'FLAGS':
             bits.append('FLAGS (%s)' % ' '.join(msg.flags))
         elif flag == 'INTERNALDATE':
             idate = msg.get_internal_date()
             ttup = rfc822.parsedate_tz(idate)
             odate = time.strftime("%d-%b-%Y %H:%M:%S ", ttup[:9])
             if ttup[9] is None:
                 odate = odate + "+0000"
             else:
                 if ttup[9] >= 0:
                     sign = "+"
                 else:
                     sign = "-"
                 odate = odate + sign + str(((abs(ttup[9]) / 3600) * 100 + (abs(ttup[9]) % 3600) / 60)).zfill(4)
             bits.append('INTERNALDATE ' + _quote(odate))
         elif flag == 'RFC822.SIZE':
             bits.append('RFC822.SIZE %d' % len(msg.body))
         elif flag == 'ENVELOPE':
             bits.append('ENVELOPE ' + collapseNestedLists([getEnvelope(msg.headers)]))
         elif flag == 'BODY.PEEK[]':
             bits.append('BODY[] ' + _literal(msg.body))
         else:
             raise ValueError("Unsupported flag '%s'" % flag)
     self.send_untagged_response("%d FETCH (%s)" % (id, " ".join(bits)))
Ejemplo n.º 19
0
    def _parse_sibling(self, sibling, headers, data):
        """
        Parses a single sibling out of a response.
        """

        sibling.exists = True

        # Parse the headers...
        for header, value in headers:
            header = header.lower()
            if header == 'content-type':
                sibling.content_type, sibling.charset = \
                    self._parse_content_type(value)
            elif header == 'etag':
                sibling.etag = value
            elif header == 'link':
                sibling.links = self._parse_links(value)
            elif header == 'last-modified':
                sibling.last_modified = mktime_tz(parsedate_tz(value))
            elif header.startswith('x-riak-meta-'):
                metakey = header.replace('x-riak-meta-', '')
                sibling.usermeta[metakey] = value
            elif header.startswith('x-riak-index-'):
                field = header.replace('x-riak-index-', '')
                reader = csv.reader([value], skipinitialspace=True)
                for line in reader:
                    for token in line:
                        token = decode_index_value(field, token)
                        sibling.add_index(field, token)
            elif header == 'x-riak-deleted':
                sibling.exists = False

        sibling.encoded_data = data

        return sibling
Ejemplo n.º 20
0
def process_reply_file(current, fname):
    new_note = {}
    reply = open(fname, "r")
    msg = rfc822.Message(reply)
    new_note['text'] = "%s\n%s" % (msg['From'], msg.fp.read())
    new_note['timestamp'] = rfc822.parsedate_tz(msg['Date'])
    current["notes"].append(new_note)
Ejemplo n.º 21
0
    def execute(self, observation):
        station_id = observation['station_id']

        raw_time = observation['observation_time_rfc822']
        parsed_time = datetime.datetime.fromtimestamp(rfc822.mktime_tz(rfc822.parsedate_tz(raw_time)))

        epoch = datetime.datetime.utcfromtimestamp(0)
        delta = int((parsed_time - epoch).total_seconds())

        observation['ObservationTime'] = delta
        observation['StationId'] = station_id

        composite_key = "%s_%d" % (station_id, delta)
        observation['CompositeKey'] = composite_key

        region = os.environ['AWS_DEFAULT_REGION']
        accessKey = os.environ['AWS_ACCESS_KEY']
        secretKey = os.environ['AWS_SECRET_KEY']

        try:
            connx = boto.dynamodb2.connect_to_region(region, aws_access_key_id=accessKey, aws_secret_access_key=secretKey)
            obs_table = Table('VocalPelicanObservation', connection = connx)
            test_row = obs_table.get_item(CompositeKey=composite_key)
        except JSONResponseError as responseError:
            # authentication problem
            print responseError
        except boto.dynamodb2.exceptions.ItemNotFound as responseError:
            # not found implies safe to add
            return obs_table.put_item(observation)

        return False
Ejemplo n.º 22
0
    def getReceiveTime(self, message):
        # This is tricky...  date comes in with an offset value that
        # represents the number of seconds of difference between the
        # parsed timezone and UTC.  The events database wants all time
        # as seconds since the epoch and treats it as UTC.  As a
        # result we have to use the datetime class to do the
        # conversion because the functions in the time module do all
        # kinds of covnersions "to be helpful"
        timestamp = message.get('Date', message.get('Sent'))
        t = rfc822.parsedate_tz(timestamp)
        if t is None:
            log.warn("Unable to process timestamp '%s' -- defaulting to now",
                     timestamp)
            return time.time()

        offset_secs = t[-1]
        if offset_secs is not None:
            # Convert the offset in seconds to minutes.  calendar wants minutes
            offset_mins = offset_secs / 60
            tz = FixedOffset(offset_mins, "Unknown")
        else:
            log.warn("Timezone not specified in '%s' -- defaulting to local timezone",
                     timestamp)
            tz = None

        # Construct dt using the date and time as well as the timezone 
        dt = datetime(t[0], t[1], t[2], t[3], t[4], t[5], 0, tz)
        secs = calendar.timegm(dt.utctimetuple())
        log.debug('Timestamp of the event (should be in UTC): %s -> %f',
                  timestamp, secs)
        return secs
Ejemplo n.º 23
0
def check_last_modified(url):
    u = urlopen(url)
    meta = u.info()
    last_modified = meta.getheaders("Last-Modified")[0]
    # modified = datetime.strptime(last_modified, '%a, %d %b %Y %H:%M:%S GMT')
    modified = datetime(*parsedate_tz(last_modified)[:7])
    return modified
Ejemplo n.º 24
0
def fix_date(string):
	replacements = (
			("Sab", "Sat"),
			)
	dateformat = "%Y-%m-%d %H:%M:%S"

	# first, fix up some common mistakes
	for repl in replacements:
		string = string.replace(repl[0], repl[1])

	# try normal date parsing
	date = rfc822.parsedate_tz(string)

	# if it succeeded...
	if date:
		# check that the time zone is present and sensible
		if not date[9] or abs(date[9]) > 12*60*60:
			# use UTC if not
			dateformat += " +0000"
		else:
			# time zone is sensible, use it
			dateformat += " %0+5d" % (date[9] / 60 / 60 * 100)
		# return properly formatted date string; if formatting is not
		# possible, fail
		try:
			return time.strftime(dateformat, date[:-1])
		except ValueError, e:
			raise FixDateError(e)
Ejemplo n.º 25
0
	def loadfrommessage(self, msg):
		self.tofield = msg.getaddrlist("To")
		f = msg.getaddr("From")
		self.fromfield = f[1]
		self.realfromfield = f[0]
		if not self.realfromfield:
			self.realfromfield = self.fromfield
		self.ccfield = msg.getaddrlist("Cc")
		if not self.ccfield:
			self.ccfield = ()
		self.subjectfield = msg.getheader("Subject")
		if not self.subjectfield:
			self.subjectfield = ""
		self.annotation = msg.getheader("X-SQmaiL-Annotation")
		if not self.annotation:
			self.annotation = ""
		self.readstatus = "Unread"
	
		# Work out the date the message arrived.

		r = ""
		for i in msg.getallmatchingheaders("Received"):
			r = r + i
		p = string.find(r, ";")
		if (p == -1):
			self.date = 0
		else:
			r = r[p+1:]
			r = rfc822.parsedate_tz(r)
			r = rfc822.mktime_tz(r)
			self.date = r

		self.headers = string.join(msg.headers, "")
		self.body = msg.fp.read()
Ejemplo n.º 26
0
def was_modified_since(header=None, mtime=0, size=0):
    """
    Was something modified since the user last downloaded it?

    header
      This is the value of the If-Modified-Since header.  If this is None,
      I'll just return True.

    mtime
      This is the modification time of the item we're talking about.

    size
      This is the size of the item we're talking about.
    """
    try:
        if header is None:
            raise ValueError
        matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header,
                           re.IGNORECASE)
        header_mtime = rfc822.mktime_tz(rfc822.parsedate_tz(matches.group(1)))
        header_len = matches.group(3)
        if header_len and int(header_len) != size:
            raise ValueError
        if mtime > header_mtime:
            raise ValueError
    except (AttributeError, ValueError):
        return True
    return False
Ejemplo n.º 27
0
    def get_contents_to_filename(self, filename, headers=None,
                                 cb=None, num_cb=10,
                                 torrent=False,
                                 version_id=None,
                                 res_download_handler=None,
                                 response_headers=None):
        """
        Retrieve an object from S3 using the name of the Key object as the
        key in S3.  Store contents of the object to a file named by 'filename'.
        See get_contents_to_file method for details about the
        parameters.
        
        :type filename: string
        :param filename: The filename of where to put the file contents
        
        :type headers: dict
        :param headers: Any additional headers to send in the request
        
        :type cb: function
        :param cb: a callback function that will be called to report
                   progress on the upload.  The callback should accept
                   two integer parameters, the first representing the
                   number of bytes that have been successfully
                   transmitted to S3 and the second representing the
                   size of the to be transmitted object.
                    
        :type cb: int
        :param num_cb: (optional) If a callback is specified with
                       the cb parameter this parameter determines the
                       granularity of the callback by defining
                       the maximum number of times the callback will
                       be called during the file transfer.  
             
        :type torrent: bool
        :param torrent: If True, returns the contents of a torrent file
                        as a string.

        :type res_upload_handler: ResumableDownloadHandler
        :param res_download_handler: If provided, this handler will
                                     perform the download.

        :type response_headers: dict
        :param response_headers: A dictionary containing HTTP headers/values
                                 that will override any headers associated with
                                 the stored object in the response.
                                 See http://goo.gl/EWOPb for details.
        """
        fp = open(filename, 'wb')
        self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
                                  version_id=version_id,
                                  res_download_handler=res_download_handler,
                                  response_headers=response_headers)
        fp.close()
        # if last_modified date was sent from s3, try to set file's timestamp
        if self.last_modified != None:
            try:
                modified_tuple = rfc822.parsedate_tz(self.last_modified)
                modified_stamp = int(rfc822.mktime_tz(modified_tuple))
                os.utime(fp.name, (modified_stamp, modified_stamp))
            except Exception: pass
Ejemplo n.º 28
0
def _readdate(txt):
    """Interpret the string as a date value."""
    import rfc822
    date = rfc822.parsedate_tz(txt.strip())
    if date is not None:
        return rfc822.mktime_tz(date)
    return None
Ejemplo n.º 29
0
Archivo: hg.py Proyecto: davej/changes
    def log(self, parent=None, limit=100):
        # TODO(dcramer): we should make this streaming
        cmd = ['log', '--template=%s' % (LOG_FORMAT,)]
        if parent:
            cmd.append('-r %s' % (parent,))
        if limit:
            cmd.append('--limit=%d' % (limit,))
        result = self.run(cmd)

        for chunk in BufferParser(result, '\x02'):
            (sha, author, author_date, parents, branches, message) = chunk.split('\x01')

            branches = filter(bool, branches.split(' ')) or ['default']
            parents = filter(lambda x: x and x != '0' * 40, parents.split(' '))

            author_date = datetime.utcfromtimestamp(
                mktime_tz(parsedate_tz(author_date)))

            yield RevisionResult(
                id=sha,
                author=author,
                author_date=author_date,
                message=message,
                parents=parents,
                branches=branches,
            )
Ejemplo n.º 30
0
def _parse_date_rfc822(dateString):
    '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date'''
    data = dateString.split()
    if not data:
        return None
    if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames:
        del data[0]
    if len(data) == 4:
        s = data[3]
        i = s.find('+')
        if i > 0:
            data[3:] = [s[:i], s[i+1:]]
        else:
            data.append('')
        dateString = " ".join(data)
    # Account for the Etc/GMT timezone by stripping 'Etc/'
    elif len(data) == 5 and data[4].lower().startswith('etc/'):
        data[4] = data[4][4:]
        dateString = " ".join(data)
    if len(data) < 5:
        dateString += ' 00:00:00 GMT'
    tm = rfc822.parsedate_tz(dateString)
    if tm:
        # Jython doesn't adjust for 2-digit years like CPython does,
        # so account for it by shifting the year so that it's in the
        # range 1970-2069 (1970 being the year of the Unix epoch).
        if tm[0] < 100:
            tm = (tm[0] + (1900, 2000)[tm[0] < 70],) + tm[1:]
        return time.gmtime(rfc822.mktime_tz(tm))
Ejemplo n.º 31
0
    def getReceiveTime(self, message):
        # This is tricky...  date comes in with an offset value that
        # represents the number of seconds of difference between the
        # parsed timezone and UTC.  The events database wants all time
        # as seconds since the epoch and treats it as UTC.  As a
        # result we have to use the datetime class to do the
        # conversion because the functions in the time module do all
        # kinds of conversions "to be helpful"
        timestamp = message.get('Date', message.get('Sent'))
        t = rfc822.parsedate_tz(timestamp)
        if t is None:
            log.warn("Unable to process timestamp '%s' -- defaulting to now",
                     timestamp)
            return time.time()

        offset_secs = t[-1]
        if offset_secs is not None:
            # Convert the offset in seconds to minutes.  calendar wants minutes
            offset_mins = offset_secs / 60
            tz = FixedOffset(offset_mins, "Unknown")
        else:
            log.warn("Timezone not specified in '%s' -- defaulting to local timezone",
                     timestamp)
            tz = None

        # Construct dt using the date and time as well as the timezone
        dt = datetime(t[0], t[1], t[2], t[3], t[4], t[5], 0, tz)
        secs = calendar.timegm(dt.utctimetuple())
        log.debug('Timestamp of the event (should be in UTC): %s -> %f',
                  timestamp, secs)
        return secs
Ejemplo n.º 32
0
def parse_pubdate(text):
    """Parse a date string into a Unix timestamp

    >>> parse_pubdate('Fri, 21 Nov 1997 09:55:06 -0600')
    880127706

    >>> parse_pubdate('')
    0

    >>> parse_pubdate('unknown')
    0
    """
    if not text:
        return 0

    parsed = parsedate_tz(text)
    if parsed is not None:
        return int(mktime_tz(parsed))

    # TODO: Fully RFC 3339-compliant parsing (w/ timezone)
    try:
        parsed = time.strptime(text[:19], '%Y-%m-%dT%H:%M:%S')
        if parsed is not None:
            return int(time.mktime(parsed))
    except Exception:
        pass

    logger.error('Cannot parse date: %s', repr(text))
    return 0
Ejemplo n.º 33
0
    def open(self):
        # XXX in future add support for compression
        headers = {'Accept-Encoding': ''}
        if _requests_version == '0':
            self._data_response = self._session.get(self._url('data'),
                                                    prefetch=False,
                                                    headers=headers)
        else:
            self._data_response = self._session.get(self._url('data'),
                                                    stream=True,
                                                    headers=headers)
        self._validate_response(self._data_response)

        size = self._data_response.headers.get('Content-Length', None)
        if size is not None:
            size = int(size)
        self._size = size

        modified = self._data_response.headers.get('Last-Modified', None)
        if modified is not None:
            modified = rfc822.mktime_tz(rfc822.parsedate_tz(modified))
        self._modified = modified

        mimetype = self._data_response.headers.get('Content-Type',
                                                   'application/octet-stream')
        self._mimetype = mimetype

        return self._data_response.raw
Ejemplo n.º 34
0
Archivo: jb2bz.py Proyecto: EQ4/h5vcc
def process_reply_file(current, fname):
    new_note = {}
    reply = open(fname, "r")
    msg = rfc822.Message(reply)
    new_note["text"] = "%s\n%s" % (msg["From"], msg.fp.read())
    new_note["timestamp"] = rfc822.parsedate_tz(msg["Date"])
    current["notes"].append(new_note)
Ejemplo n.º 35
0
def was_modified_since(header=None, mtime=0, size=0):
    """
    Was something modified since the user last downloaded it?

    header
      This is the value of the If-Modified-Since header.  If this is None,
      I'll just return True.

    mtime
      This is the modification time of the item we're talking about.

    size
      This is the size of the item we're talking about.
    """
    try:
        if header is None:
            raise ValueError
        matches = re.match(r"^([^;]+)(; length=([0-9]+))?$", header,
                           re.IGNORECASE)
        header_mtime = rfc822.mktime_tz(rfc822.parsedate_tz(
            matches.group(1)))
        header_len = matches.group(3)
        if header_len and int(header_len) != size:
            raise ValueError
        if mtime > header_mtime:
            raise ValueError
    except (AttributeError, ValueError):
        return True
    return False
Ejemplo n.º 36
0
 def parse(self, *args, **kwargs):
     """ return the time value (in seconds since 1970) """
     value = self.__call__(*args, **kwargs)
     if value:
         try:
             return mktime_tz(parsedate_tz(value))
         except TypeError:
             raise HTTPBadRequest(("Received an ill-formed timestamp for %s: %s\r\n") % (self.name, value))
Ejemplo n.º 37
0
  def convert_rfc822(self, date_string):
    """ Helper to convert rfc822 to datetime object """

    date_object = rfc822.parsedate_tz(date_string)
    date_object = rfc822.mktime_tz(date_object)
    date_object = datetime.datetime.fromtimestamp(date_object)

    return date_object
Ejemplo n.º 38
0
def twitter():
    response = requests.get(TWITTER_URL)
    if not response.ok:
        raise StopIteration()
    for obj in json.loads(response.content)['results']:
        obj['timestamp'] = time.mktime(rfc822.parsedate_tz(obj['created_at'])[:-1])
        obj['template'] = 'twitter/tweet.html'
        yield obj
Ejemplo n.º 39
0
 def _normalize_rfc822_date( self, date_string ):
     return  datetime.fromtimestamp(
                 rfc822.mktime_tz(
                     rfc822.parsedate_tz(
                         date_string
                     )
                 )
             )
Ejemplo n.º 40
0
def numericTime(s):
	# local not supported under windows
	# since we are using english anyway we skip that
	# locale.setlocale(locale.LC_ALL, 'en_GB')
	pt = rfc822.parsedate_tz(s[:20])
	# ts = time.mktime(time.strptime(s[:20], '%d %b %Y %H:%M:%S'))
	ts = time.mktime(pt[:9])
	return ts
Ejemplo n.º 41
0
def numericTime(s):
	# local not supported under windows
	# since we are using english anyway we skip that
	# locale.setlocale(locale.LC_ALL, 'en_GB')
	pt = rfc822.parsedate_tz(s[:20])
	# ts = time.mktime(time.strptime(s[:20], '%d %b %Y %H:%M:%S'))
	ts = time.mktime(pt[:9])
	return ts
Ejemplo n.º 42
0
    def addMail(self, mailString):
        """ Store mail as news item
            Returns created item
        """

        archive = self.context
        pw = self.context.portal_workflow
        
        (header, body) = splitMail(mailString)

        # if 'keepdate' is set, get date from mail,
        if self.getValueFor('keepdate'):
            timetuple = rfc822.parsedate_tz(header.get('date'))
            time = DateTime(rfc822.mktime_tz(timetuple))
        # ... take our own date, clients are always lying!
        else:
            time = DateTime()

        (TextBody, ContentType, HtmlBody, Attachments) = unpackMail(mailString)

        # Test Zeitangabe hinter Subject
        from datetime import date
        today = date.today()
        mydate = today.strftime("%d.%m.%Y")





        # let's create the news item

        subject = mime_decode_header(header.get('subject', 'No Subject'))
        sender = mime_decode_header(header.get('from','No From'))
        #title = "%s / %s" % (subject, sender)
        title = "%s"  % (subject)

        new_id = IUserPreferredURLNormalizer(self.request).normalize(title)
        id = self._findUniqueId(new_id)
        # ContentType is only set for the TextBody
        if ContentType:
            body = TextBody
        else:
            body = self.HtmlToText(HtmlBody)

# als vorlaeufige Loesung
        desc = "%s..." % (body[:60])
        uni_aktuell_body = "<p><strong>%s: %s</strong></p> <p>&nbsp;</p><pre>%s</pre>" % (mydate, sender, body)
#        uni_aktuell_body = '<p>&nbsp;</p>' + body

        objid = self.context.invokeFactory(NewsItem.meta_type, id=id, title=title, text=uni_aktuell_body, description=desc)

        mailObject = getattr(self.context, objid)
        try:
#original            pw.doActionFor(mailObject, 'hide')
            pw.doActionFor(mailObject, 'publish')
        except:
            pass
        return mailObject
Ejemplo n.º 43
0
    def log(self, parent=None, branch=None, author=None, offset=0, limit=100, paths=None):
        """ Gets the commit log for the repository.

        Each revision returned has exactly one branch name associated with it.
        This is the branch name encoded into the revision changeset description.

        See documentation for the base for general information on this function.
        """
        start_time = time()

        # TODO(dcramer): we should make this streaming
        cmd = ['log', '--template=%s' % (LOG_FORMAT,)]

        if parent and branch:
            raise ValueError('Both parent and branch cannot be set')

        # Build the -r parameter value into r_str with branch, parent and author
        r_str = None
        if branch:
            cmd.append('-b{0}'.format(branch))
        if parent:
            r_str = ('ancestors(%s)' % parent)
        if author:
            r_str = ('({r}) and author("{0}")' if r_str else 'author("{0}")')\
                .format(author, r=r_str)
        if r_str:
            cmd.append('-r reverse({0})'.format(r_str))

        if limit:
            cmd.append('--limit=%d' % (offset + limit,))

        if paths:
            cmd.extend(["glob:" + p.strip() for p in paths])

        result = self.run(cmd)

        self.log_timing('log', start_time)

        for idx, chunk in enumerate(BufferParser(result, '\x02')):
            if idx < offset:
                continue

            (sha, author, author_date, parents, branches, message) = chunk.split('\x01')

            branches = filter(bool, branches.split(' ')) or ['default']
            parents = filter(lambda x: x and x != '0' * 40, parents.split(' '))

            author_date = datetime.utcfromtimestamp(
                mktime_tz(parsedate_tz(author_date)))

            yield RevisionResult(
                id=sha,
                author=author,
                author_date=author_date,
                message=message,
                parents=parents,
                branches=branches,
            )
Ejemplo n.º 44
0
    def _item_publish_date(self, item):
        """Returns the UTC date that the comic strip was published

        Converts a RFC822 string to a UTC datetime.

        """
        parts = rfc822.parsedate_tz(item['pubDate'])
        timestamp = rfc822.mktime_tz(parts)
        return datetime.fromtimestamp(timestamp, pytz.utc)
Ejemplo n.º 45
0
def improve_date(input):
	mytz = input.split(" ")[-1].strip()
	if mytz[1:-1] in time.tzname or mytz == get_zone():
		return input
	else:
		tz = rfc822.parsedate_tz(input)
		if not tz or not tz[9]:
			return input
		return "%s (%s)" % (formatdate(time.mktime(tz[:9])-tz[9]-(time.timezone), True), input)
Ejemplo n.º 46
0
 def _onsuccess(boto_key):
     checksum = boto_key.etag.strip('"')
     last_modified = boto_key.last_modified
     modified_tuple = rfc822.parsedate_tz(last_modified)
     modified_stamp = int(rfc822.mktime_tz(modified_tuple))
     return {'checksum': checksum,
             'last_modified': modified_stamp,
             'width': boto_key.metadata.width,
             'height': boto_key.metadata.height}
Ejemplo n.º 47
0
    def _item_publish_date_tz(self, item):
        """Returns the date that the comic strip was published.

        The original timezone is preserved.

        """
        parts = rfc822.parsedate_tz(item['pubDate'])
        timestamp = rfc822.mktime_tz(parts)
        return datetime.fromtimestamp(timestamp)
Ejemplo n.º 48
0
def parse_pubdate(text):
    """Parse a date string into a Unix timestamp

    >>> parse_pubdate('Fri, 21 Nov 1997 09:55:06 -0600')
    880127706

    >>> parse_pubdate('2003-12-13T00:00:00+02:00')
    1071266400

    >>> parse_pubdate('2003-12-13T18:30:02Z')
    1071340202

    >>> parse_pubdate('Mon, 02 May 1960 09:05:01 +0100')
    -305049299

    >>> parse_pubdate('')
    0

    >>> parse_pubdate('unknown')
    0
    """
    if not text:
        return 0

    parsed = parsedate_tz(text)
    if parsed is not None:
        try:
            pubtimeseconds = int(mktime_tz(parsed))
            return pubtimeseconds
        except (OverflowError, ValueError):
            logger.warning(
                'bad pubdate %s is before epoch or after end of time (2038)',
                parsed)
            return 0

    try:
        parsed = time.strptime(text[:19], '%Y-%m-%dT%H:%M:%S')
        if parsed is not None:
            m = re.match(r'^(?:Z|([+-])([0-9]{2})[:]([0-9]{2}))$', text[19:])
            if m:
                parsed = list(iter(parsed))
                if m.group(1):
                    offset = 3600 * int(m.group(2)) + 60 * int(m.group(3))
                    if m.group(1) == '-':
                        offset = 0 - offset
                else:
                    offset = 0
                parsed.append(offset)
                return int(mktime_tz(tuple(parsed)))
            else:
                return int(time.mktime(parsed))
    except Exception:
        pass

    logger.error('Cannot parse date: %s', repr(text))
    return 0
def convert_created_at(line, created_at_format):
    """ fix the created_at time since it is 'RFC 2822' """
    created_at = line['created_at']
    # convert the tuple to a list, so we can pop the tz out of it.
    c = list(parsedate_tz(created_at))
    tz = c.pop(-1)
    dt = datetime.datetime.fromtimestamp(mktime(c))
    line['created_at'] = dt.strftime(created_at_format)
    line['created_at_shift'] = tz
    return line
Ejemplo n.º 50
0
def DateHeader_parse(self, *args, **kwargs):
    """ return the time value (in seconds since 1970) """
    value = self.__call__(*args, **kwargs)
    if value:
        try:
            return mktime_tz(parsedate_tz(value))
        except (OverflowError, TypeError):
            raise HTTPBadRequest(
                ("Received an ill-formed timestamp for %s: %s\r\n") %
                (self.name, value))
Ejemplo n.º 51
0
def mirror_refdata(refdataTopdir='https://kbase.us/refdata/', refdataDiskdir='refdata'):
    refdataReq = requests.get(refdataTopdir)
    modules = refdataReq.json()

    for module in modules:
        moduledir = refdataTopdir + module['name']
        moduleReq = requests.get(moduledir)
        versions = moduleReq.json()
        for version in versions:
            versiondir = moduledir + '/' + version['name']
            versionDiskPath= refdataDiskdir+'/'+module['name']+'/'+version['name']

# for retrieving dot files (requires building .dotfile manually at the source)
            remotedotfilesls=[]
            remotedotfilesreq=requests.get(versiondir+'/.dotfiles')
# for now just get the .dotfiles file
            try:
                remotedotfilesls=remotedotfilesreq.json()
            except:
                pass

            readyHeadReq = requests.head(versiondir+'/__READY__')
	    print readyHeadReq.headers['Last-Modified']
	    mirrorDatestamp=rfc822.mktime_tz(rfc822.parsedate_tz(readyHeadReq.headers['Last-Modified']))
            readyFile=versionDiskPath+'/__READY__'
            if os.path.isfile(readyFile):
                fileDatestamp=os.path.getmtime(readyFile)
                print mirrorDatestamp
		print fileDatestamp
                if mirrorDatestamp < fileDatestamp:
                    print "mirror __READY__ older than local file, skipping " + versiondir
		    continue
            try:
                os.makedirs(versionDiskPath)
                print 'created dir ' + versionDiskPath
            except OSError as exc:
	        if os.path.isdir(versionDiskPath):
	            pass
                else:
	            raise
            retrieve_dir(versiondir,versionDiskPath)

# hacky support for dot files
            for dotfile in remotedotfilesls:
# for now only support for dot dirs
# (need to break out the file retrieval into a separate method to do files here)
                retrieve_dir(versiondir+'/'+dotfile['name'],versionDiskPath+'/'+dotfile['name'])

            # if this works, retrieve __READY__ file
	    print 'retrieve ' + versiondir + ' succeeded, retrieving __READY__ file'
            filereq=requests.get(versiondir + '/__READY__', timeout=5, stream=True)
	    with open (versionDiskPath + '/__READY__', 'wb') as fd:
	        for chunk in filereq.iter_content(1024):
		    fd.write(chunk)
Ejemplo n.º 52
0
 def __init__(self, id, spamHeader, dateHeader, headers):
     self.id = id
     self.score, self.required = scoreFromHeader(spamHeader)
     if self.score is None:
         warn('no score on msg id=%s header="%s"' % (self.id, spamHeader))
     if dateHeader:
         self.date = datetime.fromtimestamp(
             rfc822.mktime_tz(rfc822.parsedate_tz(dateHeader)))
     self.headers = headers
     self.data = {}
     self.flags = None
def s3_has_uptodate_file(bucket, transfer_file, s3_key_name):
    """Check if S3 has an existing, up to date version of this file.
    """
    s3_key = bucket.get_key(s3_key_name)
    if s3_key:
        s3_size = s3_key.size
        local_size = os.path.getsize(transfer_file)
        s3_time = rfc822.mktime_tz(rfc822.parsedate_tz(s3_key.last_modified))
        local_time = os.path.getmtime(transfer_file)
        return s3_size == local_size and s3_time >= local_time
    return False
Ejemplo n.º 54
0
def from_internaldate(date):
    try:
        #print >>sys.stderr, "ndate", date
        date = rfc822.parsedate_tz(date)
        #print >>sys.stderr, "xdate", date, type(date)
        date = gmtime.mkgmtime(date)
        #print >>sys.stderr, "okdate", date
    except:
        date = 0  # None # (1970, 1, 1, 18, 16, 22, 0, 1, 0)

    return date
Ejemplo n.º 55
0
 def got_contents_to_filename(response):
     fp.close()
     # if last_modified date was sent from s3, try to set file's timestamp
     if self.last_modified != None:
         try:
             modified_tuple = rfc822.parsedate_tz(self.last_modified)
             modified_stamp = int(rfc822.mktime_tz(modified_tuple))
             os.utime(fp.name, (modified_stamp, modified_stamp))
         except Exception: pass
     if callable(callback):
         callback(response)
Ejemplo n.º 56
0
def _parse_date(value):
    if not value:
        return None
    t = parsedate_tz(value)
    if t is None:
        # Could not parse
        return None
    if t[-1] is None:
        # No timezone given.  None would mean local time, but we'll force UTC
        t = t[:9] + (0, )
    t = mktime_tz(t)
    return datetime.fromtimestamp(t, UTC)