Beispiel #1
0
    def write(self, rev, time, page, author, minor, upload):
        flags = 0
        if minor:
            flags += 1

        if author.isip:
            flags += 2

        if author.isdel:
            flags += 4

        if upload:
            flags += 8

        data = self.struct.pack(
            rev,
            timegm(time.utctimetuple()),
            page,
            (author.id >> 64) & MAX_INT64,
            author.id & MAX_INT64,
            flags
            )

        self.fh.seek(rev * self.struct.size, os.SEEK_SET)
        self.fh.write(data)
Beispiel #2
0
def _to_timestamp(time_string):
    import calendar
    utc = pytz.timezone("UTC")
    time = datetime.strptime(time_string, "%d.%m.%Y %H:%M")
    time = pytz.timezone(app.config['APP_TZ']).localize(time)
    time = utc.normalize(time.astimezone(utc))
    return calendar.timegm(time.utctimetuple()) 
Beispiel #3
0
 def write(self, rev, time, page, author, minor):
     flags = 0
     if minor:
         flags += 1
     if author.isip:
         flags += 2
     if author.isdel:
         flags += 4
     data = self.struct.pack(rev, timegm(time.utctimetuple()), page,
                             author.id, flags)
     self.fh.seek(rev * self.struct.size)
     self.fh.write(data)
     if self.maxrev < rev:
         self.maxrev = rev
Beispiel #4
0
	def write(self, rev, time, page, author, minor):
		flags = 0
		if minor:
			flags += 1
		if author.isip:
			flags += 2
		if author.isdel:
			flags += 4
		data = self.struct.pack(
			rev,
			timegm(time.utctimetuple()),
			page,
			author.id,
			flags
			)
		self.fh.seek(rev * self.struct.size)
		self.fh.write(data)
		if self.maxrev < rev:
			self.maxrev = rev
Beispiel #5
0
    def write(self, rev, time, page, author, minor):
        flags = 0
        if minor:
            flags += 1

        if author.isip:
            flags += 2

        if author.isdel:
            flags += 4

        data = self.struct.pack(
            rev,
            timegm(time.utctimetuple()),
            page,
            (author.id >> 64) & MAX_INT64,
            author.id & MAX_INT64,
            flags
            )

        self.maxrev = max(self.maxrev, rev)

        self.fh.seek(rev * self.struct.size, os.SEEK_SET)
        self.fh.write(data)
Beispiel #6
0
 def put_float_data(self, sensor, value, time):
     with self.lock:
         conn = self.get_conn()
         c = conn.cursor()
         c.execute("insert into sensor_float_data(val, timestamp, sensor_id) values( ?, ?, ? )", (value, calendar.timegm(time.utctimetuple()), sensor))
         conn.commit()
Beispiel #7
0
 def _getInMillis(self, time):
     rval = int(calendar.timegm(time.utctimetuple()) * 1000)
     rval += time.microsecond // 1000
     return rval
Beispiel #8
0
 def _getInMillis(self, time):
     rval = int(calendar.timegm(time.utctimetuple()) * 1000)
     rval += time.microsecond // 1000
     return rval
    url_future = {executor.submit(load_url, link): link for link in urls}
    for future in concurrent.futures.as_completed(url_future):
        this_id = url_future[future][0]
        try:
            # print(url_future[future],"\n\n\n")
            url = url_future[future]
            response = future.result()
            soup = BeautifulSoup(response, "lxml")

            # Loop through all URLS ans extarct
            data = {}

            md5_id = hashlib.md5(url.encode('utf-8')).hexdigest()

            time = datetime.utcnow()
            unixtime = calendar.timegm(time.utctimetuple())
            last_crawled = datetime.utcfromtimestamp(
                (unixtime)).isoformat() + "Z"

            if "pdf" in url or "404" in url or ".js" in url or ".css" in url:
                data["url"] = url
                data["id"] = md5_id
                data["title"] = ""
                data["body"] = ""
                data["description"] = ""
                data["keywords"] = ""
                data["last_crawled"] = last_crawled
                continue

            # Extract Body