def most_recent_ami(ami_name, filterList): filters = [] for filter in filterList: name, _, values = filter.partition(',') _, _, name = name.partition('=') _, _, values = values.partition('=') filters.append({ 'Name': name.strip(), 'Values': map(lambda x: x.strip(), values.split(',')) }) ec2 = boto3.resource('ec2') most_recent_image = None for image in ec2.images.filter(Filters=filters): if most_recent_image is None: most_recent_image = image continue if parse_dt(image.creation_date) > parse_dt( most_recent_image.creation_date): most_recent_image = image if most_recent_image is None: sys.stderr.write('No images matched filters provided.\n') raise SystemExit(1) sys.stderr.write('{0}: {1.id}{2}'.format(ami_name, most_recent_image, '\n')) return '{0.id}'.format(most_recent_image)
def _check_feed(self, browse_url, rss_url, sort='featured'): """ Check RSS feed URLs and that the results on the listing pages match those for their respective RSS feeds. """ # Check URLs. r = self.client.get(browse_url, follow=True) doc = pq(r.content) rss_url += '?sort=%s' % sort eq_(doc('link[type="application/rss+xml"]').attr('href'), rss_url) eq_(doc('#subscribe').attr('href'), rss_url) # Ensure that the RSS items match those on the browse listing pages. r = self.client.get(rss_url) rss_doc = pq(r.content) pg_items = doc('.items .item') rss_items = rss_doc('item') for pg_item, rss_item in zip(pg_items, rss_items): pg_item, rss_item = pq(pg_item), pq(rss_item) pg_url = absolutify(pg_item.find('h3 a').attr('href')) rss_url = rss_item.find('link').text() abs_url = pg_url.split('?')[0] assert rss_url.endswith(abs_url), 'Unexpected URL: %s' % abs_url if sort in ('added', 'updated'): # Check timestamps. pg_ts = pg_item.find('.updated').text().strip('Added Updated') rss_ts = rss_item.find('pubDate').text() # Look at YMD, since we don't have h:m on listing pages. eq_(parse_dt(pg_ts).isocalendar(), parse_dt(rss_ts).isocalendar())
def _check_feed(self, browse_url, rss_url, sort='featured'): """ Check RSS feed URLs and that the results on the listing pages match those for their respective RSS feeds. """ # Check URLs. response = self.client.get(browse_url, follow=True) doc = pq(response.content) rss_url += '?sort=%s' % sort assert doc('link[type="application/rss+xml"]').attr('href') == rss_url assert doc('#subscribe').attr('href') == rss_url # Ensure that the RSS items match those on the browse listing pages. response = self.client.get(rss_url) rss_doc = pq(response.content) pg_items = doc('.items .item') rss_items = rss_doc('item') # We have to set `parser=xml` because of # https://github.com/gawel/pyquery/issues/93 items_urls = zip( sorted((absolutify(pq(x).find('h3 a').attr('href')), pq(x)) for x in pg_items), sorted((pq(x).find('link').text(), pq(x, parser='xml')) for x in rss_items)) for (pg_url, pg_item), (rss_url, rss_item) in items_urls: abs_url = pg_url.split('?')[0] assert rss_url.endswith(abs_url), 'Unexpected URL: %s' % abs_url if sort in ('added', 'updated'): # Check timestamps. pg_ts = pg_item.find('.updated').text().strip('Added Updated') rss_ts = rss_item.find('pubDate').text() # Look at YMD, since we don't have h:m on listing pages. assert parse_dt(pg_ts).isocalendar() == ( parse_dt(rss_ts).isocalendar())
def temporal_from_literal(text): ''' Parse a temporal coverage from a literal ie. either: - an ISO date range - a single ISO date period (month,year) ''' if text.count('/') == 1: # This is an ISO date range as preconized by Gov.uk # http://guidance.data.gov.uk/dcat_fields.html start, end = text.split('/') return db.DateRange( start=parse_dt(start).date(), end=parse_dt(end).date() ) else: separators = text.count('-') if separators == 0: # this is a year return db.DateRange( start=date(int(text), 1, 1), end=date(int(text), 12, 31) ) elif separators == 1: # this is a month dt = parse_dt(text).date() return db.DateRange( start=dt.replace(day=1), end=dt.replace(day=calendar.monthrange(dt.year, dt.month)[1]) )
def date_between(start_date, end_date, format): start_date = parse_dt(start_date) end_date = parse_dt(end_date) diff = int((start_date - end_date).days) func = date_after if end_date > start_date else date_before return func(start_date, format, diff, False)
def get_most_recent_ami_id(ec2, application_name, application_role, application_profile, os_type): """ Given a set of criteria, retrieve the latest AMI ID from AWS using EC2 service. """ filters = [{ 'Name': 'tag:Application Role', 'Values': [application_role] }, { 'Name': 'tag:Application Profile', 'Values': [application_profile] }, { 'Name': 'tag:OS Type', 'Values': [os_type] }] most_recent_image = None for image in ec2.images.filter(Filters=filters): if most_recent_image is None: most_recent_image = image continue if parse_dt(image.creation_date) > parse_dt( most_recent_image.creation_date): most_recent_image = image if most_recent_image is not None: sys.stderr.write('{0}: {1.id}{2}'.format(application_name, most_recent_image, '\n')) most_recent_ami_id = '{0.id}'.format(most_recent_image) else: most_recent_ami_id = None return most_recent_ami_id
def _check_feed(self, browse_url, rss_url, sort='featured'): """ Check RSS feed URLs and that the results on the listing pages match those for their respective RSS feeds. """ # Check URLs. r = self.client.get(browse_url, follow=True) doc = pq(r.content) rss_url += '?sort=%s' % sort assert doc('link[type="application/rss+xml"]').attr('href') == rss_url assert doc('#subscribe').attr('href') == rss_url # Ensure that the RSS items match those on the browse listing pages. r = self.client.get(rss_url) rss_doc = pq(r.content) pg_items = doc('.items .item') rss_items = rss_doc('item') # We have to set `parser=xml` because of # https://github.com/gawel/pyquery/issues/93 items_urls = zip( sorted((absolutify(pq(x).find('h3 a').attr('href')), pq(x)) for x in pg_items), sorted((pq(x).find('link').text(), pq(x, parser='xml')) for x in rss_items)) for (pg_url, pg_item), (rss_url, rss_item) in items_urls: abs_url = pg_url.split('?')[0] assert rss_url.endswith(abs_url), 'Unexpected URL: %s' % abs_url if sort in ('added', 'updated'): # Check timestamps. pg_ts = pg_item.find('.updated').text().strip('Added Updated') rss_ts = rss_item.find('pubDate').text() # Look at YMD, since we don't have h:m on listing pages. assert parse_dt(pg_ts).isocalendar() == ( parse_dt(rss_ts).isocalendar())
def check_time_extents(self, ds): """ Check that the values of time_coverage_start/time_coverage_end approximately match the data. """ if not (hasattr(ds.dataset, 'time_coverage_start') and hasattr(ds.dataset, 'time_coverage_end')): return epoch = parse_dt("1970-01-01 00:00:00 UTC") t_min = (parse_dt(ds.dataset.time_coverage_start) - epoch).total_seconds() t_max = (parse_dt(ds.dataset.time_coverage_end) - epoch).total_seconds() # identify t vars as per CF 4.4 t_vars = [ var for name, var in ds.dataset.variables.iteritems() if is_time_variable(name, var) ] if len(t_vars) == 0: return Result( BaseCheck.MEDIUM, False, 'time_coverage_extents_match', 'Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4' ) obs_mins = { var._name: Unit(str( var.units)).get_converter("seconds since 1970-01-01").evaluate( np.nanmin(var)) for var in t_vars } obs_maxs = { var._name: Unit(str( var.units)).get_converter("seconds since 1970-01-01").evaluate( np.nanmax(var)) for var in t_vars } min_pass = any( (np.isclose(t_min, min_val) for min_val in obs_mins.itervalues())) max_pass = any( (np.isclose(t_max, max_val) for max_val in obs_maxs.itervalues())) allpass = sum((min_pass, max_pass)) msgs = [] if not min_pass: msgs.append( "Data for possible time variables (%s) did not match time_coverage_start value (%s)" % (obs_mins, t_min)) if not max_pass: msgs.append( "Data for possible time variables (%s) did not match time_coverage_end value (%s)" % (obs_maxs, t_max)) return Result(BaseCheck.MEDIUM, (allpass, 2), 'time_coverage_extents_match', msgs)
def resource_time(r): from dateutil.relativedelta import relativedelta from dateutil.parser import parse as parse_dt if r.get_value('year'): t = r.get_value('year') return parse_dt(t + '0101'), relativedelta(year=1) elif r.get_value('month'): t = r.get_value('month') return parse_dt(t + '01'), relativedelta(month=1) return None, None
def collect_cards_internal(requester, board, board_members, checklists, lists, card_status): collected_cards = [] last_card_id = None while True: filters = {'filter': 'all', 'fields': 'all', 'limit': '1000'} if last_card_id: # Trello api supports paging by using the id of the last card in the previous batch as 'before' parameter filters['before'] = last_card_id cards = board.get_cards(filters=filters, card_filter=card_status) for card in cards: db_card, created = Document.objects.get_or_create( trello_board_id=board.id, trello_card_id=card.id, requester=requester, user_id=requester.id ) card_last_activity = card.raw.get('dateLastActivity') last_activity = parse_dt(card_last_activity).isoformat() last_activity_ts = int(parse_dt(card_last_activity).timestamp()) collected_cards.append(card) if not created and db_card.last_updated_ts and db_card.last_updated_ts >= last_activity_ts: logger.debug("Trello card '%s' for user '%s' hasn't changed", card.name[:50], requester.username) continue logger.debug("Processing card '%s' for user '%s'", card.name[:50], requester.username) db_card.primary_keywords = TRELLO_PRIMARY_KEYWORDS db_card.secondary_keywords = TRELLO_SECONDARY_KEYWORDS['card'] db_card.last_updated = last_activity db_card.last_updated_ts = last_activity_ts db_card.trello_title = 'Card: {}'.format(card.name) db_card.webview_link = card.url db_card.trello_content = { 'description': _to_html(card.description), 'checklists': [ { 'id': cl.id, 'name': cl.name, 'items': cl.items } for cl in checklists[card.id] ] } db_card.trello_card_status = 'Archived' if card.closed else 'Open' db_card.trello_card_members = [board_members.get(m) for m in card.idMembers if m in board_members] db_card.trello_board_name = board.name db_card.trello_list = lists.get(card.idList) db_card.last_synced = get_utc_timestamp() db_card.download_status = Document.READY db_card.save() algolia_engine.sync(db_card, add=created) last_card_id = card.id if len(cards) < 1000: break return collected_cards
def command_tvrage(bot, user, channel, args): """Fetch episode information from tvrage.""" if not args: return bot.say(channel, "I need a show to search for!") show_id = find_series(args) if show_id is None: return bot.say(channel, "Series not found.") r = requests.get("http://services.tvrage.com/feeds/episodeinfo.php", params={"sid": show_id}) show = ET.fromstring(r.content) if show is None: return bot.say(channel, "Series not found.") now = datetime.now().replace(tzinfo=tz.tzlocal()) name = show.find("name").text link = show.find("link").text next_episode = show.find("nextepisode") if next_episode is not None: number = next_episode.find("number").text title = next_episode.find("title").text airtime = parse_dt( next_episode.find('airtime[@format="RFC3339"]').text) if airtime < now: return bot.say( channel, '%s - %s - "%s" aired %s ago <%s>' % (name, number, title, now - airtime, link), ) return bot.say( channel, '%s - %s - "%s" airs in %s <%s>' % (name, number, title, airtime - now, link), ) latest_episode = show.find("latestepisode") if latest_episode is None: return bot.say(channel, 'No episode information for "%s" <%s>' % (name, link)) number = latest_episode.find("number").text title = latest_episode.find("title").text airtime = parse_dt( latest_episode.find("airdate").text).replace(tzinfo=tz.tzlocal()) return bot.say( channel, '%s - %s - "%s" aired %s ago <%s>' % (name, number, title, now - airtime, link), )
def extract_period(text: str, meter_number: str) -> Optional[Tuple[date, date]]: period_regex = (r"%s([A-Z][a-z]{2} \d+, \d{4})([A-Z][a-z]{2} \d+, \d{4})" % meter_number) matches = re.search(period_regex, text) if not matches: return None try: start = parse_dt(matches.group(1)).date() end = parse_dt(matches.group(2)).date() return start, end except ValueError: return None
def command_tvrage(bot, user, channel, args): """Fetch episode information from tvrage.""" if not args: return bot.say(channel, "I need a show to search for!") show_id = find_series(args) if show_id is None: return bot.say(channel, "Series not found.") r = requests.get( "http://services.tvrage.com/feeds/episodeinfo.php", params={"sid": show_id} ) show = ET.fromstring(r.content) if show is None: return bot.say(channel, "Series not found.") now = datetime.now().replace(tzinfo=tz.tzlocal()) name = show.find("name").text link = show.find("link").text next_episode = show.find("nextepisode") if next_episode is not None: number = next_episode.find("number").text title = next_episode.find("title").text airtime = parse_dt(next_episode.find('airtime[@format="RFC3339"]').text) if airtime < now: return bot.say( channel, '%s - %s - "%s" aired %s ago <%s>' % (name, number, title, now - airtime, link), ) return bot.say( channel, '%s - %s - "%s" airs in %s <%s>' % (name, number, title, airtime - now, link), ) latest_episode = show.find("latestepisode") if latest_episode is None: return bot.say(channel, 'No episode information for "%s" <%s>' % (name, link)) number = latest_episode.find("number").text title = latest_episode.find("title").text airtime = parse_dt(latest_episode.find("airdate").text).replace(tzinfo=tz.tzlocal()) return bot.say( channel, '%s - %s - "%s" aired %s ago <%s>' % (name, number, title, now - airtime, link), )
def get_datasources(self): datasource_arr = [] for ds_name in os.listdir(self.dir): try: with open(os.path.join(self.dir, ds_name, 'metadata.json'), 'r') as fp: try: datasource = json.load(fp) datasource['created_at'] = parse_dt(datasource['created_at'].split('.')[0]) datasource['updated_at'] = parse_dt(datasource['updated_at'].split('.')[0]) datasource_arr.append(datasource) except Exception as e: print(e) except Exception as e: print(e) return datasource_arr
def _convert_data(self, data): if isinstance(data, list): return [self._convert_data(a) for a in data] elif isinstance(data, dict): dictionary = {inflection.underscore(k): self._convert_data(v) for (k, v) in data.items()} for k, v in dictionary.items(): try: if "datetime" in k and type(v) == str: dictionary[k] = parse_dt(v) elif "date" in k and type(v) == str: dictionary[k] = parse_dt(v).date() except ValueError: # Malformed datetimes: dictionary[k] = None return dictionary else: return data
def iter_videos(self): for div in self.parser.select(self.document.getroot(), 'article.rs-cell'): title = self.parser.select(div, 'h3 a', 1) url = title.attrib['href'] m = re.match('^http://pluzz.francetv.fr/videos/(.+).html$', url) if not m: self.logger.debug('url %s does not match' % url) continue _id = m.group(1) video = PluzzVideo(_id) video.title = unicode(title.text.strip()) for p in div.xpath('.//p[@class="bientot"]'): video.title += ' - %s' % p.text.split('|')[0].strip() video.date = parse_dt(div.find('span').attrib['data-date']) duration = div.xpath('.//span[@class="type-duree"]')[0].text.split('|')[1].strip() if duration[-1:] == "'": t = [0, int(duration[:-1])] else: t = map(int, duration.split(':')) video.duration = datetime.timedelta(hours=t[0], minutes=t[1]) url = self.parser.select(div, 'a.vignette img', 1).attrib['src'] video.thumbnail = Thumbnail(url) yield video
def _process_customer(requester, customer, mailboxes, folders, users): if customer.id is None or (customer.emails is None and customer.fullname is None): # can't use customer with no data logger.debug("Customer '%s' for user '%s' cannot be used - no data", (customer.id or customer.fullname), requester.username) return db_customer, created = Document.objects.get_or_create( helpscout_customer_id=customer.id, requester=requester, user_id=requester.id) db_customer.helpscout_name = customer.fullname logger.debug("Processing Helpscout customer '%s' for user '%s'", customer.fullname, requester.username) new_updated = customer.modifiedat new_updated_ts = parse_dt(new_updated).timestamp() if not created and db_customer.last_updated_ts: new_updated_ts = db_customer.last_updated_ts \ if db_customer.last_updated_ts > new_updated_ts else new_updated_ts db_customer.last_updated = datetime.utcfromtimestamp( new_updated_ts).isoformat() + 'Z' db_customer.last_updated_ts = new_updated_ts db_customer.helpscout_title = 'User: {}'.format(customer.fullname) db_customer.webview_link = 'https://secure.helpscout.net/customer/{}/0/'.format( customer.id) db_customer.primary_keywords = HELPSCOUT_KEYWORDS['primary'] db_customer.secondary_keywords = HELPSCOUT_KEYWORDS['secondary'] db_customer.helpscout_company = customer.organization db_customer.helpscout_emails = ', '.join( e.get('value') for e in customer.emails if 'value' in e) if customer.emails else None db_customer.save() algolia_engine.sync(db_customer, add=created) subtask(process_customer).delay(requester, db_customer, mailboxes, folders, users)
def __init__(self, timestamp, point): self.time = parse_dt(timestamp) self.temperature = point.get('temperature', None) self.wind_speed = point.get('wind_speed', None) self.wind_gust = point.get('wind_gust', None) self.wind_direction = point.get('wind_direction', None) self.humidity = point.get('humidity', None) self.cloud_coverage = point.get('cloud_coverage', None) self.pressure = point.get('pressure', None) self.dew_point = point.get('dew_point', None) self.precipitation = point.get('precipitation', None) self.precipitation_1h = point.get('precipitation_1h', None) self.radiation_global_accumulation = point.get( 'radiation_global_accumulation', None) self.radiation_long_wave_accumulation = point.get( 'radiation_long_wave_accumulation', None) self.radiation_netsurface_long_wave_accumulation = point.get( 'radiation_netsurface_long_wave_accumulation', None) self.radiation_netsurface_short_wave_accumulation = point.get( 'radiation_netsurface_short_wave_accumulation', None) self.radiation_diffuse_accumulation = point.get( 'radiation_diffuse_accumulation', None) try: self.weather_symbol = int(point.get('weather_symbol', 0)) except ValueError: self.weather_symbol = None
def iter_videos(self): for div in self.parser.select(self.document.getroot(), 'article.rs-cell'): title = self.parser.select(div, 'h3 a', 1) url = title.attrib['href'] m = re.match('^http://pluzz.francetv.fr/videos/(.+).html$', url) if not m: self.logger.debug('url %s does not match' % url) continue _id = m.group(1) video = PluzzVideo(_id) video.title = unicode(title.text.strip()) for p in div.xpath('.//p[@class="bientot"]'): video.title += ' - %s' % p.text.split('|')[0].strip() date = div.xpath('.//p[@class="diffusion"]')[0].text.split('|')[0].strip() pattern = re.compile(r'(\d{2}-\d{2}-\d{2})(.*?)(\d{2}:\d{2})') match = pattern.search(date) if match: video.date = parse_dt("%s %s" % (match.group(1), match.group(3))) duration = div.xpath('.//span[@class="type-duree"]')[0].text.split('|')[1].strip() if duration[-1:] == "'": t = [0, int(duration[:-1])] else: t = map(int, duration.split(':')) video.duration = datetime.timedelta(hours=t[0], minutes=t[1]) url = self.parser.select(div, 'a.vignette img', 1).attrib['src'] video.thumbnail = BaseImage(url) video.thumbnail.url = video.thumbnail.id yield video
def set_details(self, v): for li in self.parser.select(self.document.getroot(), 'ul.spaced li'): span = li.find('b') name = span.text.strip() value = span.tail.strip() if name == 'Duration:': m = re.match('((\d+)hrs)?\s*((\d+)min)?\s*((\d+)sec)?', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) hours = m.group(2) or 0 minutes = m.group(4) or 0 seconds = m.group(6) or 0 v.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) elif name == 'Submitted:': author = li.find('i') if author is None: author = li.find('a') if author is None: v.author = unicode(value) else: v.author = unicode(author.text) elif name == 'Rating:': value = li.find('span').text v.rating = int(value.rstrip('%')) v.rating_max = 100 elif name == 'Date:': v.date = parse_dt(value)
def get_time(link): res = self.session.get(link) sleep(self.PAGE_QUERY_INTERVAL) soup = BeautifulSoup(res.content, 'html.parser') time_ele = soup.select_one( 'div#story div#shareBar div.shareBar__info--author span') return parse_dt(time_ele.contents[0])
def from_last_n_days(cls, N): response = requests.get(settings.FACEBOOK_GROUP_URL) response_body = response.json() result = [] while response_body['data']: for offer_data in response_body['data']: dt = parse_dt(offer_data['updated_time']) if 'message' not in offer_data or ( timezone.now() - datetime.timedelta(days=N)) > dt: continue elements = offer_data['message'].split('\n') POSITION = 0 position = elements[POSITION] lat_lng = position.split(':')[1] lat, lng = lat_lng.split(',') PRICE = 1 price = elements[PRICE] price = price.split(':')[1] MESSAGE = 2 message = elements[MESSAGE] message = message.split(':')[1] result.append( UnsavedOffer(lat.strip(), lng.strip(), price.strip(), message.strip(), offer_data['id'].split('_')[1])) response = requests.get(response_body['paging']['next']) response_body = response.json() return result
def fetch(self, symbol): last_date = self.get_last_date_from_store(symbol) if last_date is None: logger.debug('last date for {} not found. using default of {}'.format(symbol, self.first_date)) last_date = parse_dt(self.first_date) else: logger.debug('date of last record for {} is {}'.format(symbol, last_date)) self.validate_datetime_object(last_date) now = datetime.datetime.now(tz=pytz.UTC) if now - last_date < datetime.timedelta(hours=6): logger.debug('the time is now {}. it has not been 6 hourse since {}. not fetching anything.' .format(now.isoformat(), last_date)) return {} first_fetch_date = last_date + datetime.timedelta(hours=6) query_data = dict(self.query_template) query_data['time_start'] = first_fetch_date query_data['limit'] = 1500 # just over one year of records @6hrs url = self.get_url(symbol, query_data) logger.debug('getting url {}'.format(url)) response = requests.get(url, headers=self.headers) if response.status_code != 200: logger.error('request {} failed: {}'.format(url, response.reason)) return {} logger.info('account has {} more API requests for this time period'.format( response.headers['X-RateLimit-Remaining'])) data = response.json() # validate the FIRST date from the data returned. Not perfect, but will prevent future heartache. self.validate_datetime_object(data[0][THE_DATETIME_FIELD]) return data
def validate_params(self, url, parameters=None, tolerance=300): parameters = parameters or {} param = copy.copy(parameters) timestamp = param.get('timestamp', None) if not timestamp: return False timestamp_dt = parse_dt(timestamp) delta = datetime.utcnow() - DateTime(timestamp_dt).utcdatetime() if delta.seconds > tolerance: return False auth_sig = param.get('auth_sig', None) if not auth_sig: return False del param['auth_sig'] key_values = sorted(param.items(), key=lambda x: x[0]) qs = urllib.urlencode(key_values) if '?' not in url: url += '?' sig = hmac.new(self.secret, url + qs, hashlib.sha1).hexdigest() return sig == auth_sig
def date_after_before(base_date, format, direction, limit, needs_parse=True): if needs_parse: base_date = parse_dt(base_date) days = rand_int(0, abs(limit)) return arrow_api.get(direction(base_date, timedelta(days=days))).format(format)
def set_details(self, v): for li in self.parser.select(self.document.getroot(), 'ul.spaced li'): span = li.find('label') name = span.text.strip() value = span.tail.strip() if name == 'Duration:': m = re.match('((\d+)hrs)?\s*((\d+)min)?\s*((\d+)sec)?', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) hours = m.group(2) or 0 minutes = m.group(4) or 0 seconds = m.group(6) or 0 v.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) elif name == 'Submitted:': author = li.find('i') if author is None: author = li.find('a') if author is None: v.author = unicode(value) else: v.author = unicode(author.text) elif name == 'Rating:': value = li.find('span').text v.rating = int(value.rstrip('%')) v.rating_max = 100 elif name == 'Date:': v.date = parse_dt(value)
def iter_videos(self): for div in self.parser.select(self.document.getroot(), 'article.rs-cell'): title = self.parser.select(div, 'h3 a', 1) url = title.attrib['href'] m = re.match('^http://pluzz.francetv.fr/videos/(.+).html$', url) if not m: self.logger.debug('url %s does not match' % url) continue _id = m.group(1) video = PluzzVideo(_id) video.title = unicode(title.text.strip()) for p in div.xpath('.//p[@class="bientot"]'): video.title += ' - %s' % p.text.split('|')[0].strip() video.date = parse_dt(div.find('span').attrib['data-date']) duration = div.xpath('.//span[@class="type-duree"]')[0].text.split( '|')[1].strip() if duration[-1:] == "'": t = [0, int(duration[:-1])] else: t = map(int, duration.split(':')) video.duration = datetime.timedelta(hours=t[0], minutes=t[1]) url = self.parser.select(div, 'a.vignette img', 1).attrib['src'] video.thumbnail = Thumbnail(url) yield video
def get_description(self, res, donation): donation_date = parse_dt(donation.data["donation_date"]).date() dates = set() for r in res: dates |= set( map(lambda x: parse_dt(x), getattr(r.internals.flags, "bo_changes_dates", []))) from_dt = donation_date - self.change_minus_delta to_dt = donation_date + self.change_plus_delta dates = list(filter(lambda x: from_dt <= x.date() <= to_dt, dates)) return "Структура власності змінилася {}".format(", ".join( map(date_filter, sorted(dates))))
def pullAsos(year, station, datatype): '''This model pulls hourly data for a specified year and ASOS station. * ASOS is the Automated Surface Observing System, a network of about 900 weater stations, they collect data at hourly intervals, they're run by NWS, FAA, and DOD, and there is data going back to 1901 in some sites. * AKA METAR data, which is the name of the format its stored in. * For ASOS station code see https://www.faa.gov/air_traffic/weather/asos/ * For datatypes see bottom of https://mesonet.agron.iastate.edu/request/download.phtml * Note for USA stations (beginning with a K) you must NOT include the 'K' ''' url = ( 'https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?' 'station={}&data={}&year1={}&month1=1&day1=1&year2={}&month2=1&day2=1' '&tz=Etc%2FUTC&format=onlycomma&latlon=no&direct=no&report_type=1' '&report_type=2').format(station, datatype, year, int(year) + 1) r = requests.get(url) assert r.status_code != 404, "Dataset URL does not exist. " + url data = [x.split(',') for x in r.text.splitlines()[1:]] verifiedData = [-9999.0] * 8760 firstDT = datetime(int(year), 1, 1) for r in data: if 'M' not in r: deltatime = parse_dt(r[1]) - firstDT verifiedData[int(deltatime.total_seconds() / 3600)] = float(r[2]) return verifiedData
def need_check(self): '''Does the resource needs to be checked against its linkchecker? We check unavailable resources often, unless they go over the threshold. Available resources are checked less and less frequently based on their historical availability. ''' min_cache_duration, max_cache_duration, ko_threshold = [ current_app.config.get(k) for k in ( 'LINKCHECKING_MIN_CACHE_DURATION', 'LINKCHECKING_MAX_CACHE_DURATION', 'LINKCHECKING_UNAVAILABLE_THRESHOLD', ) ] count_availability = self.extras.get('check:count-availability', 1) is_available = self.check_availability() if is_available == 'unknown': return True elif is_available or count_availability > ko_threshold: delta = min(min_cache_duration * count_availability, max_cache_duration) else: delta = min_cache_duration if self.extras.get('check:date'): limit_date = datetime.now() - timedelta(minutes=delta) check_date = self.extras['check:date'] if not isinstance(check_date, datetime): try: check_date = parse_dt(check_date) except (ValueError, TypeError): return True if check_date >= limit_date: return False return True
def standardize_date(input): if isinstance(input, datetime.date): return input.strftime("%Y%m%d") elif isinstance(input, datetime.datetime): return input.date().strftime("%Y%m%d") else: return parse_dt(input).date().strftime("%Y%m%d")
def validate_params(self, url, parameters=None, tolerance=300): parameters = parameters or {} param = copy.copy(parameters) timestamp = param.get('timestamp', None) if not timestamp: return False timestamp_dt = parse_dt(timestamp) delta = datetime.utcnow() - DateTime(timestamp_dt).utcdatetime() if delta.seconds > tolerance: return False auth_sig = param.get('auth_sig', None) if not auth_sig: return False del param['auth_sig'] key_values = sorted(param.items(), key=lambda x:x[0]) qs = urllib.urlencode(key_values) if '?' not in url: url += '?' sig = hmac.new(self.secret, url + qs, hashlib.sha1).hexdigest() return sig == auth_sig
def populate_from_metadata(self, metadata_map): ts = None if 'exif:DateTime' in metadata_map: try: ts = Image.parse_exif_dt(metadata_map['exif:DateTime']) except: self.add_tags('flag:invalid-exif-datetime') elif 'date:create' in metadata_map: try: ts = parse_dt(metadata_map['date:create']) except: self.add_tags('flag:invalid-date-create') self.add_tags('flag:missing-exif-datetime') else: self.add_tags('flag:missing-exif-datetime', 'flag:missing-date-create') if ts is not None: self.create_timestamp = ts self.add_tags( 'date:%d' % ts.year, 'date:%02d-%d' % (ts.month, ts.year))
def get_current(self, city_id): dom = self._get_weather_dom(city_id) current = dom.getElementsByTagName('yweather:condition')[0] return Current( parse_dt(current.getAttribute('date')).date(), float(current.getAttribute('temp')), unicode(current.getAttribute('text')), u'C')
def check_time_extents(self, ds): """ Check that the values of time_coverage_start/time_coverage_end approximately match the data. """ if not (hasattr(ds, "time_coverage_start") and hasattr(ds, "time_coverage_end")): return epoch = parse_dt("1970-01-01 00:00:00 UTC") t_min = (parse_dt(ds.time_coverage_start) - epoch).total_seconds() t_max = (parse_dt(ds.time_coverage_end) - epoch).total_seconds() # identify t vars as per CF 4.4 t_vars = [var for name, var in ds.variables.items() if is_time_variable(name, var)] if len(t_vars) == 0: return Result( BaseCheck.MEDIUM, False, "time_coverage_extents_match", [ "Could not find time variable to test extent of time_coverage_start/time_coverage_end, see CF-1.6 spec chapter 4.4" ], ) obs_mins = { var._name: Unit(str(var.units)).convert(np.nanmin(var), "seconds since 1970-01-01") for var in t_vars } obs_maxs = { var._name: Unit(str(var.units)).convert(np.nanmax(var), "seconds since 1970-01-01") for var in t_vars } min_pass = any((np.isclose(t_min, min_val) for min_val in obs_mins.values())) max_pass = any((np.isclose(t_max, max_val) for max_val in obs_maxs.values())) allpass = sum((min_pass, max_pass)) msgs = [] if not min_pass: msgs.append( "Data for possible time variables (%s) did not match time_coverage_start value (%s)" % (obs_mins, t_min) ) if not max_pass: msgs.append( "Data for possible time variables (%s) did not match time_coverage_end value (%s)" % (obs_maxs, t_max) ) return Result(BaseCheck.MEDIUM, (allpass, 2), "time_coverage_extents_match", msgs)
def set_details(self, v): # try to get as much from the page itself obj = self.parser.select(self.document.getroot(), 'h1[itemprop=name]') if len(obj) > 0: v.title = unicode(obj[0].text) obj = self.parser.select(self.document.getroot(), 'meta[itemprop=dateCreated]') if len(obj) > 0: v.date = parse_dt(obj[0].attrib['content']) #obj = self.parser.select(self.document.getroot(), 'meta[itemprop=duration]') obj = self.parser.select(self.document.getroot(), 'meta[itemprop=thumbnailUrl]') if len(obj) > 0: v.thumbnail = Thumbnail(unicode(obj[0].attrib['content'])) # for the rest, use the JSON config descriptor json_data = self.browser.openurl('http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", "")) data = json.load(json_data) if data is None: raise BrokenPageError('Unable to get JSON config for id: %r' % int(v.id)) #print data if v.title is None: v.title = unicode(data['video']['title']) if v.thumbnail is None: v.thumbnail = Thumbnail(unicode(data['video']['thumbnail'])) v.duration = datetime.timedelta(seconds=int(data['video']['duration'])) # determine available codec and quality # use highest quality possible quality = 'sd' codec = None if 'vp6' in data['video']['files']: codec = 'vp6' if 'vp8' in data['video']['files']: codec = 'vp8' if 'h264' in data['video']['files']: codec = 'h264' if not codec: raise BrokenPageError('Unable to detect available codec for id: %r' % int(v.id)) if 'hd' in data['video']['files'][codec]: quality = 'hd' v.url = unicode("http://player.vimeo.com/play_redirect?quality=%s&codecs=%s&clip_id=%d&time=%s&sig=%s&type=html5_desktop_local" % (quality, codec, int(v.id), data['request']['timestamp'] , data['request']['signature'])) # attempt to determine the redirected URL to pass it instead # since the target server doesn't check for User-Agent, unlike # for the source one. # HACK: we use mechanize directly here for now... FIXME #print "asking for redirect on '%s'" % (v.url) self.browser.set_handle_redirect(False) try: self.browser.open_novisit(v.url) except HTTPError, e: if e.getcode() == 302 and hasattr(e, 'hdrs'): #print e.hdrs['Location'] v.url = unicode(e.hdrs['Location'])
def date_filter(dt, dayfirst=False): return (formats.date_format( timezone.localtime( ensure_aware( parse_dt(dt, dayfirst=dayfirst) if isinstance(dt, str) else dt) ), "SHORT_DATE_FORMAT", ) if dt else "")
def validate_datetime_object(cls, dt): if isinstance(dt, str): dt = parse_dt(dt) assert dt.tzname() == 'UTC', 'tzname==`{}`. Expected `UTC`'.format(dt.tzname()) assert not dt.hour % 6, 'hour==`{}` not a multiple of `6`'.format(dt.hour) for attr in 'minute', 'second', 'microsecond': value = getattr(dt, attr) assert value == 0, 'datetime attribute `{}`==`{}`. Expected `0`'.format(attr, value)
def ttl(self, key_bytes: str) -> Optional[int]: now = datetime.utcnow() with closing(self.conn.cursor()) as cursor: cursor.execute(GET_TTL_DQL, (key_bytes,)) (expiry,) = cursor.fetchone() expiry_dt = parse_dt(expiry) ttl_td = expiry_dt - now return int(ttl_td.total_seconds())
def get_current(self, city_id): dom = self._get_weather_dom(city_id) current = dom.getElementsByTagName("yweather:condition")[0] return Current( parse_dt(current.getAttribute("date")), float(current.getAttribute("temp")), unicode(current.getAttribute("text")), u"C", )
def iter_forecast(self, city_id): dom = self._get_weather_dom(city_id) for forecast in dom.getElementsByTagName('yweather:forecast'): yield Forecast(parse_dt(forecast.getAttribute('date')), float(forecast.getAttribute('low')), float(forecast.getAttribute('high')), unicode(forecast.getAttribute('text')), u'C', )
def iter_forecast(self, city_id): dom = self._get_weather_dom(city_id) for forecast in dom.getElementsByTagName("yweather:forecast"): yield Forecast( parse_dt(forecast.getAttribute("date")), float(forecast.getAttribute("low")), float(forecast.getAttribute("high")), unicode(forecast.getAttribute("text")), u"C", )
def _items_exist(type, ids_list, updated_after_str): if not isinstance(ids_list, list): return abort(400) ids = map(unicode, ids_list) res = dict((id, False) for id in ids) where = app.table.c.id.in_(ids) if updated_after_str: where &= (app.table.fully_updated_on > parse_dt(updated_after_str)) res.update(r for r in app.db.execute(select([app.table.c.id, True], where))) return res
def query(): filters = [] active_str = request.args.get('$active', '') if active_str: if active_str.lower() in ('1','t','true'): filters.append(app.table.c.expires_on >= datetime.datetime.utcnow()) else: filters.append(app.table.c.expires_on < datetime.datetime.utcnow()) for t in ('before','after'): arg_str = request.args.get('$seen_' + t) if arg_str: dt = parse_dt(arg_str) if t == 'before': filters.append(app.table.c.first_seen_on < dt) else: filters.append(app.table.c.last_seen_on > dt) for key in ('id', 'type'): in_key = key + '[]' if in_key in request.args: vals = request.args.getlist(in_key) filters.append(getattr(app.table.c, key).in_(vals)) for in_key, vals in request.args.iterlists(): if not in_key.endswith('[]') or not vals: continue fieldname = in_key[:-2] if fieldname in ('id', 'type'): continue ored_filters = [] for val in vals: ored_filters.append(app.table.c.attributes.op('->>')(fieldname) == unicode(val)) if ored_filters: filters.append(reduce(or_, ored_filters)) items = [] if filters: where = reduce(and_, filters) else: where = None for row in app.db.execute(select([app.table], where)): item = {} items.append(item) for key in ('id', 'type', 'attributes'): item[key] = row[key] return jsonxfy(status='ok', items=items)
def parse_offer(self, html): try: offer_cash_ratio = float( self.pat_offer_cash_ratio.search(html).group(1)) / 100 except: offer_cash_ratio = 1. offers_at = self.pat_offers_at.search( self.pat_tags.sub('', html)).group(1) offers_at = self.pat_ymd.sub('/', offers_at) offers_at = parse_dt(offers_at) t = lxml.html.fromstring(html) cells = [x.text_content().strip() for x in t.xpath( './/div[@class="detail_cn"]/table/tbody/tr/td')] per_row = len(cells) // 2 d = dict(zip(cells[:per_row], cells[per_row:])) if not d: # 按图片走 raise NotImplementedError name = d['藏品名称'] symbol = d['藏品代码'] offer_price = float(d['申购价格'].replace('元', '')) offer_quantity = int(d['申购总数'] .replace('万', '0000') .replace('.', '')) if symbol.startswith('1'): type_ = '邮票' elif symbol.startswith('2'): type_ = '钱币' else: raise ValueError stamp = { 'exchange': self.abbr, 'type_': type_, 'symbol': symbol, 'name': name, 'offer_price': offer_price, 'offer_quantity': offer_quantity, 'offer_accmax': offer_quantity, 'offer_overbuy': True, 'offer_fee': self.offer_fee, 'offer_cash_ratio': offer_cash_ratio, 'offers_at': offers_at, 'draws_at': offers_at + timedelta(days=1), 'trades_at': offers_at + timedelta(days=2), } return [stamp]
def dict2row(table_, row): """Coerce `row`'s value types as necessary""" result = {} columns = { c.name: c for c in table_.columns } for k, v in row.items(): if not k in columns: raise SqlRestException( "Column '{}' doesn't exist in table '{}'".format(k, table_.name) ) # convert date types type = columns[k].type if isinstance(type, s.types.Date): v = parse_dt(v).date() elif isinstance(type, s.types.DateTime): v = parse_dt(v) # no need to convert boolean, numeric, or string types. only remaining type # is interval, but I don't have a good parser for it... result[k] = v return result
def __init__(self, timestamp, point): self.time = parse_dt(timestamp) self.temperature = point.get('temperature', None) self.wind_speed = point.get('wind_speed', None) self.wind_gust = point.get('wind_gust', None) self.wind_direction = point.get('wind_direction', None) self.humidity = point.get('humidity', None) self.cloud_coverage = point.get('cloud_coverage', None) self.pressure = point.get('pressure', None) self.dew_point = point.get('dew_point', None) self.precipitation = point.get('precipitation', None) self.precipitation_1h = point.get('precipitation_1h', None) self.weather_symbol = int(point.get('weather_symbol', 0))
def _fetch_json(self, data_type, areas, end_date=None): ''' Fetch JSON from API ''' # If end_date isn't set, default to tomorrow if end_date is None: end_date = date.today() - timedelta(days=1) # If end_date isn't a date or datetime object, try to parse a string if not isinstance(end_date, date) and not isinstance(end_date, datetime): end_date = parse_dt(end_date) # Create request to API r = requests.get(self.API_URL % data_type, params={ 'currency': self.currency, 'endDate': end_date.strftime('%d-%m-%Y'), 'entityName': ''.join(areas), }) # Return JSON response return r.json()
def command_seen(bot, user, channel, args): """Displays the last action by the given user""" if not args: return bot.say(channel, "Please provide a nick to search...") table = get_table(bot, channel) # Return the first match, there shouldn't be multiples anyway user = table.find_one(nick=args) if not user: return bot.say(channel, "I haven't seen %s on %s" % (args, channel)) if not isinstance(user["action_time"], datetime): user["action_time"] = parse_dt(user["action_time"]) # Calculate last seen in seconds last_seen = datetime.now() - user["action_time"] # Get string for last seen last_seen = __get_length_str(last_seen.days * 86400 + last_seen.seconds) # If the last action was part or quit, show also the message if user["last_action"] in ["left", "quit"]: return bot.say( channel, "%s was last seen at %s (%s ago) [%s, %s]" % ( user["nick"], "{0:%Y-%m-%d %H:%M:%S}".format(user["action_time"]), last_seen, user["last_action"], user["last_message"], ), ) # Otherwise just show the time and action return bot.say( channel, "%s was last seen at %s (%s ago) [%s]" % ( user["nick"], "{0:%Y-%m-%d %H:%M:%S}".format(user["action_time"]), last_seen, user["last_action"], ), )
def get_video_from_json(self, data): # session_id is unique per talk # vault_media_id is unique per page # (but can refer to 2 video files for dual screen) # solr_id is "${vault_media_id}.${conference_id}.${session_id}.$vault_media_type_id{}" # XXX: do we filter them or let people know about them? #if 'anchor' in data: # if data['anchor']['href'] == '#': # # file will not be accessible (not free and not logged in) # return None if 'vault_media_id' not in data: return None media_id = int(data['vault_media_id']) video = GDCVaultVideo(media_id) # 1013679 has \n in title... video.title = unicode(data.get('session_name', '').replace('\n', '')) # TODO: strip out <p>, <br> and other html... # XXX: 1013422 has all 3 and != if 'overview' in data: video.description = unicode(data['overview']) elif 'spell' in data: video.description = unicode(data['spell']) else: video.description = unicode(data.get('description', '')) if 'image' in data: video.thumbnail = BaseImage(data['image']) video.thumbnail.url = video.thumbnail.id if 'speakers_name' in data: video.author = unicode(", ".join(data['speakers_name'])) if 'start_date' in data: video.date = parse_dt(data['start_date']) if 'score' in data: video.rating = data['score'] video.set_empty_fields(NotAvailable) return video
def daterange_start(value): '''Parse a date range start boundary''' if not value: return None elif isinstance(value, datetime): return value.date() elif isinstance(value, date): return value result = parse_dt(value).date() dashes = value.count('-') if dashes >= 2: return result elif dashes == 1: # Year/Month only return result.replace(day=1) else: # Year only return result.replace(day=1, month=1)
def do_final(self, args): """Take a machine off the floor""" db = self.db machs = db.all_machines finals = db.finals while not args.isnumeric(): args = input("Machine #? ") args = int(args) machine = db((machs.slot_num == args) & (machs.on_floor == True)) if machine.isempty(): print("Machine not found.") return machine = get_one(machine) # display basic machine info print('\n\x1b[32;1m' + machine.description + '\x1b[0m') print('-' * len(machine.description)) rows = [] rows.append(['slot_num', 'smid', 'seal_num']) rows.append(['loc_row', 'oid_dpu', None, 'oid_box']) display_record(machine, rows) print('') final_date = '' while not final_date: try: final_date = parse_dt(input("Final date? ")) except ValueError: print("Unable to parse date, try yyyy/mm/dd format") final_date = '' machine.update_record(on_floor=False, loc_casino=0, loc_row='', oid_dpu=0, oid_box=0) finals.insert(final_date=final_date, machine=machine.id) db.commit() print("Machine taken off floor.")
def daterange_end(value): '''Parse a date range end boundary''' if not value: return None elif isinstance(value, datetime): return value.date() elif isinstance(value, date): return value result = parse_dt(value).date() dashes = value.count('-') if dashes >= 2: # Full date return result elif dashes == 1: # Year/Month return result + relativedelta(months=+1, days=-1, day=1) else: # Year only return result.replace(month=12, day=31)
def __init__(self, event): self.__dict__.update(event) self.gravatar_id = '' self.login = self.username = ANON self.fullname = '' self.lang = '' self.query_delay = QUERY_DELAY self.url_path = urlparse(self.url).path.strip('/') self.url_parts = self.url_path.strip('/').split('/') self.utc = parse_dt(self.created_at).astimezone(utc).isoformat() self.append_fullname = True self.prepend_username = True self.rendered = '' if hasattr(self, 'actor_attributes'): self.gravatar_id = self.actor_attributes['gravatar_id'] self.login = self.actor_attributes['login'] self.username = self.actor_attributes.get('name','') # generally avoid empty usernames if not self.username.strip(): self.username = self.actor_attributes['login'] if hasattr(self, 'repository'): self.fullname = urlparse(self.repository['url']).path.strip('/') self.lang = self.repository.get('language','') # avoid naming conflict with cube down the road try: self.typ = self.type del self.type except AttributeError: pass # already done self.render()
def import_maint(db, f): """ Import maintenance log from a .csv file into the database. Args: db - DAL object f - Path object of the .csv file """ with f.open() as csvfile: csvreader = csv.DictReader(csvfile) updated_rows = 0 machs = db.all_machines cabs = db.cabinets pms = db.pm_activity techs = db.tech_names for r in csvreader: dt = parse_dt(r['Datetime']) m = int(r['Machine']) code = int(r['Code']) tech = db(techs.full_name == r['User Name']).select().first() cab = db(machs.slot_num == m).select(machs.cabinet).first() q = pms.code_date == dt q &= pms.machine == m q &= pms.pm_code == code new_row = db(q).isempty() if new_row and tech and cab: # if record already exists or tech not found, skip pms.insert(code_date=dt, machine=cab.cabinet, tech_name=tech.id, pm_code=code) db.commit() updated_rows += 1 return updated_rows
def pullAsos(year, station, datatype): '''This model pulls hourly data for a specified year and ASOS station. * ASOS is the Automated Surface Observing System, a network of about 900 weater stations, they collect data at hourly intervals, they're run by NWS, FAA, and DOD, and there is data going back to 1901 in some sites. * AKA METAR data, which is the name of the format its stored in. * For ASOS station code see https://www.faa.gov/air_traffic/weather/asos/ * For datatypes see bottom of https://mesonet.agron.iastate.edu/request/download.phtml * Note for USA stations (beginning with a K) you must NOT include the 'K' ''' url = ('https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?' 'station={}&data={}&year1={}&month1=1&day1=1&year2={}&month2=1&day2=1' '&tz=Etc%2FUTC&format=onlycomma&latlon=no&direct=no&report_type=1' '&report_type=2').format(station, datatype, year, int(year)+1) r = requests.get(url) assert r.status_code != 404, "Dataset URL does not exist. " + url data = [x.split(',') for x in r.text.splitlines()[1:]] verifiedData = [-9999.0] * 8760 firstDT = dt(int(year), 1, 1) for r in data: if 'M' not in r: deltatime = parse_dt(r[1]) - firstDT verifiedData[int(deltatime.total_seconds()/3600)] = float(r[2]) return verifiedData
# File "C:\Users\Евгений\Anaconda3\lib\site-packages\suds\__init__.py", line 28, in <module> # from version import __build__, __version__ # ImportError: No module named 'version' # # -*- coding: utf-8 -*- from __future__ import unicode_literals from __future__ import print_function from datetime import datetime, timedelta from dateutil.parser import parse as parse_dt from suds.client import Client from suds.xsd.doctor import Import, ImportDoctor cbr_namespace = "http://web.cbr.ru/" url = "http://www.cbr.ru/DailyInfoWebServ/DailyInfo.asmx?wsdl" imp = Import('http://www.w3.org/2001/XMLSchema', location='http://www.w3.org/2001/XMLSchema.xsd') imp.filter.add(cbr_namespace) client = Client(url, doctor=ImportDoctor(imp)) start = datetime.now() - timedelta(days=30) end = datetime.now() result = client.service.Ruonia(start, end) for x in result.diffgram.Ruonia.ro: print("Дата %s" % parse_dt(x.D0).strftime('%Y.%m.%d')) print("Ставка, %s %%" % x.ruo) print("Объем сделок %s млрд. руб\n" % x.vol)