def write(self, rev, time, page, author, minor, upload): flags = 0 if minor: flags += 1 if author.isip: flags += 2 if author.isdel: flags += 4 if upload: flags += 8 data = self.struct.pack( rev, timegm(time.utctimetuple()), page, (author.id >> 64) & MAX_INT64, author.id & MAX_INT64, flags ) self.fh.seek(rev * self.struct.size, os.SEEK_SET) self.fh.write(data)
def _to_timestamp(time_string): import calendar utc = pytz.timezone("UTC") time = datetime.strptime(time_string, "%d.%m.%Y %H:%M") time = pytz.timezone(app.config['APP_TZ']).localize(time) time = utc.normalize(time.astimezone(utc)) return calendar.timegm(time.utctimetuple())
def write(self, rev, time, page, author, minor): flags = 0 if minor: flags += 1 if author.isip: flags += 2 if author.isdel: flags += 4 data = self.struct.pack(rev, timegm(time.utctimetuple()), page, author.id, flags) self.fh.seek(rev * self.struct.size) self.fh.write(data) if self.maxrev < rev: self.maxrev = rev
def write(self, rev, time, page, author, minor): flags = 0 if minor: flags += 1 if author.isip: flags += 2 if author.isdel: flags += 4 data = self.struct.pack( rev, timegm(time.utctimetuple()), page, author.id, flags ) self.fh.seek(rev * self.struct.size) self.fh.write(data) if self.maxrev < rev: self.maxrev = rev
def write(self, rev, time, page, author, minor): flags = 0 if minor: flags += 1 if author.isip: flags += 2 if author.isdel: flags += 4 data = self.struct.pack( rev, timegm(time.utctimetuple()), page, (author.id >> 64) & MAX_INT64, author.id & MAX_INT64, flags ) self.maxrev = max(self.maxrev, rev) self.fh.seek(rev * self.struct.size, os.SEEK_SET) self.fh.write(data)
def put_float_data(self, sensor, value, time): with self.lock: conn = self.get_conn() c = conn.cursor() c.execute("insert into sensor_float_data(val, timestamp, sensor_id) values( ?, ?, ? )", (value, calendar.timegm(time.utctimetuple()), sensor)) conn.commit()
def _getInMillis(self, time): rval = int(calendar.timegm(time.utctimetuple()) * 1000) rval += time.microsecond // 1000 return rval
url_future = {executor.submit(load_url, link): link for link in urls} for future in concurrent.futures.as_completed(url_future): this_id = url_future[future][0] try: # print(url_future[future],"\n\n\n") url = url_future[future] response = future.result() soup = BeautifulSoup(response, "lxml") # Loop through all URLS ans extarct data = {} md5_id = hashlib.md5(url.encode('utf-8')).hexdigest() time = datetime.utcnow() unixtime = calendar.timegm(time.utctimetuple()) last_crawled = datetime.utcfromtimestamp( (unixtime)).isoformat() + "Z" if "pdf" in url or "404" in url or ".js" in url or ".css" in url: data["url"] = url data["id"] = md5_id data["title"] = "" data["body"] = "" data["description"] = "" data["keywords"] = "" data["last_crawled"] = last_crawled continue # Extract Body