def insert_tweets(tweets, query): # TODO clean up the logic re the max_id -- the current tweet_id is not # necessarily the max id for all hastags being monitored! We need to keep # separate max_id for each tag... max_id = -1 results = [] feedback = '' for tweet in tweets: tweet['_id'] = tweet['id'] max_id = int(tweet['_id']) tweet.pop('id', None) # delete the old id key tweet['hashtags'] = make_hash_field(tweet['entities']['hashtags']) tweet['created_at'] = parse(tweet['created_at']).datetime tweet['user']['created_at'] = parse(tweet['user']['created_at']).datetime try: result = TWEETS.update({'_id': tweet['_id']}, tweet, upsert=True) # print '{} inserted with tags {}'.format(result, tweet['hashtags']) results.append(result) except: logging.info('ERROR in updating document id: {}.'.format(tweet['_id'])) feedback = ['+' for result in results if not result['updatedExisting']] feedback = '{:3} / {:3} {}'.format(len(feedback), len(tweets), query) print feedback return max_id
def test_timedelta_arithmetic(self): hour = timedelta(hours=1) d = delorean.parse("2014/06/02 10:00:00 -0700") hourbefore = delorean.parse("2014/06/02 09:00:00 -0700") hourafter = delorean.parse("2014/06/02 11:00:00 -0700") self.assertEqual(d + hour, hourafter) self.assertEqual(d - hour, hourbefore)
def run(self, session, args): num_lines = min(MAX_NUM_LINES, args.num_lines) from_ = None to_ = utcnow() if args.from_: from_ = parse(args.from_) if args.to_: to_ = parse(args.to_) result = [] for history_item in self.get_history(session, num_lines, to_, from_=from_): result.append(self.format_message(history_item)) return result
def check_twitter(query): try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords([query]) # let's define all words we would like to have a look for tso.setLanguage('en') # we want to see German tweets only #tso.setCount(1000000) # please dear Mr Twitter, only give us 7 results per page tso.setIncludeEntities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = 'A4yyJgy9yY0mcJDQn4LXhrjcz', consumer_secret = 'ID6luLGLOUCx9ADVl77IysKibEblDhuwS6sehQ3SUuEB3ZNsoW', access_token = '2689652840-K2dw8nIKu7VJHrW6snsOOeFZFiEGqd5wPAaLm9V', access_token_secret = 'oB0KjuzAv9bGSaPbDA0Ate7mXfnmhh94ff9x2EGQjcY0e' ) now_time = delorean.parse(str(datetime.utcnow())) now_time_minute = now_time._dt.time().minute now_time_hour = now_time._dt.time().hour cont = True i = 0 average_retweets = 0 biggest_no_of_retweets = 0 for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :) tweet_time_minute = delorean.parse(ts.getMetadata()['date'])._dt.time().minute tweet_time_hour = delorean.parse(ts.getMetadata()['date'])._dt.time().hour if (tweet_time_hour + 1 == now_time_hour) and ((now_time_minute - tweet_time_minute) <= 2): average_retweets += tweet['retweet_count'] if tweet['retweet_count'] >= biggest_no_of_retweets: biggest_no_of_retweets = tweet['retweet_count'] # print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) i +=1 # print '.' else: break try: average_retweets = average_retweets / i return 'The biggest number of retweets was ' + str(biggest_no_of_retweets) + ' and the average number of retweets was ' + str(average_retweets) except ZeroDivisionError: return 'No tweets for specified keyword' except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
def handle(self, *args, **options): DATE = delorean.parse("2016-12-31 12:00:00", timezone="US/Mountain").datetime self.stdout.write('Update Command args: {}'.format(options))#DEBUG csvfile = open(options['csvfile'][0], 'rb') update_parts = list(csv.DictReader(csvfile)) #IPython.embed() no_fail = True for p in update_parts: #Lowercase dict p = dict((k.lower(), v) for k, v in p.iteritems()) part = Part.objects.get(id=int(p['id'])) if p['part_number'] != part.part_number: no_fail = False print "'{}' dose not match '{}' for id {}".format(p['part_number'], part.part_number, part.id) assert no_fail for p in update_parts: #Lowercase dict p = dict((k.lower(), v) for k, v in p.iteritems()) part = Part.objects.get(id=int(p['id'])) qty = int(p['qty']) part.qty = qty part.location = p['location'] part.full_clean() print "Updating part: {}".format(part.part_number) part._meta.fields[-1].auto_now = False #Force lastupdate part.lastupdate = DATE part.save(note=p['notes'])
def parse_results_file(self, src, data_directory): """ """ saver = Saver() process = LacProcessMethods() latest_directory = "%s%s_latest" % (data_directory, src.source_short) election = Election.objects.filter(electionid=src.election.electionid).first() for file in src.source_files.split(", "): latest_path = os.path.join(latest_directory, file) file_exists = os.path.isfile(latest_path) file_has_size = os.path.getsize(latest_path) if file_exists == True and file_has_size > 0: rows = process.open_results_file(latest_path) race_ids = process.get_race_ids_from(rows) election_package = process.collate_and_fetch_records_for_race(race_ids, rows) races = election_package[0] election_title = election_package[1]["title"] election_stats = election_package[1]["stats"] file_timestring = None file_timestamp = None pacific = pytz.timezone("US/Pacific") for t in election_title: if t[3:5] == "TD": parser = TD_parser() parsed = parser.parse_line(t) timestring = parsed["date"] + " " + parsed["time"] file_timestring = timestring if file_timestring: file_timestamp = parse(file_timestring, dayfirst=False).datetime file_timestamp = file_timestamp.replace(tzinfo=pacific) if self.testing == True: update_this = self.testing else: update_this = saver._eval_timestamps(file_timestamp, src.source_latest) if update_this == False: logger.info("\n*****\nwe have newer data in the database so let's delete these files\n*****") os.remove(latest_path) else: logger.info("\n*****\nwe have new data to save and we'll update timestamps in the database\n*****") saver._update_result_timestamps(src, file_timestamp) title = process.dictify_records_and_return(election_title) stats = process.dictify_records_and_return(election_stats) election_info = process.compile_election_stats(title, stats) for r in races: records = process.dictify_records_and_return(races[r]) """ checks to see if this is a recall contest or a nonpartisan contest for now, it's unclear how to store or display these contests in future, however, we may want to parse and return their results """ skip = process.check_if_recall_or_nonpartisan(records) if skip: pass else: contest_package = process.compile_contest_results(records) process.update_database(contest_package, election, src) logger.info("we've finished processing lac results") else: logger.error("unable to determine whether this data is newer than what we already have.")
def test_parse_with_fixed_offset_timezone(self): tz = pytz.FixedOffset(-480) dt = tz.localize(datetime(2015, 1, 1)) dt_str = dt.strftime('%Y-%m-%d %H:%M:%S %z') do = delorean.parse(dt_str) self.assertEqual(do.datetime, dt) self.assertEqual(do.timezone, tz)
def test_parse_with_tzutc_timezone(self, mock_get_localzone): mock_get_localzone.return_value = pytz.utc dt = pytz.utc.localize(datetime(2015, 1, 1)) dt_str = dt.strftime('%Y-%m-%d %H:%M:%S %Z') do = delorean.parse(dt_str) self.assertEqual(do.datetime, dt) self.assertEqual(do.timezone, pytz.utc)
def start(self, filename): with open(filename) as f: for line in f: parsed = json.loads(line) parsed_date = parse(parsed['created_at']) if self._should_post(parsed_date): self.publish(line)
def test_parse_with_overriding_timezone_parameter(self): tz = pytz.timezone('US/Pacific') dt = tz.localize(datetime(2015, 1, 1)) tz = dt.tzinfo dt_str = dt.strftime('%Y-%m-%d %H:%M:%S -0500') do = delorean.parse(dt_str, timezone='US/Pacific') self.assertEqual(do.datetime, dt) self.assertEqual(do.timezone, tz)
def parse_query_value(query_str): """ Return value for the query string """ try: query_str = str(query_str).strip('"\' ') if query_str == 'now': d = utcnow() else: # Parse datetime string or timestamp try: if query_str.isdigit() and len(query_str) == 13: query_str = query_str[:10] d = epoch(float(query_str)) if query_str.isdigit() and len(query_str) == 8: query_str = str(query_str) + " 00:00:00" d = parse(query_str + " +0800") except ValueError: d = parse(str(query_str) + " +0800") except (TypeError, ValueError): d = None return d
def gates(): try: service_list = OrderedDict() env_list = dict() now = Delorean.now() for group in blueprint.mongo.get_groups(): env_list[group] = set() service_list[group] = dict() for service_name in blueprint.mongo.get_services_in_group(group): service = blueprint.mongo.get_gate(group, service_name) for env in service["environments"]: env_list[group].add(env) if service["environments"][env]["state_timestamp"]: service["environments"][env]["state_age"] = ( now - (now - parse(service["environments"][env]["state_timestamp"])) ).humanize() if service["environments"][env]["message_timestamp"]: service["environments"][env]["message_age"] = ( now - (now - parse(service["environments"][env]["message_timestamp"])) ).humanize() service["environments"][env]["api_closed"] = gate_is_closed(service, env) for t in service["environments"][env]["queue"]: t["age"] = (now - (now - parse(t["updated"]))).humanize() service_list[group][service_name] = service env_list[group] = sorted(env_list[group]) return view_util.render( "gates.html", "Gates", env_list=env_list, gate_list=service_list, info_list=util.generate_info(blueprint.config), ) except (ConnectionFailure, OperationFailure) as error: return view_util.error_page(error.message)
def test_parse_with_tzlocal_timezone(self, mock_get_local_zone): tz = pytz.timezone('US/Eastern') mock_get_local_zone.return_value = tz dt = datetime(2015, 1, 1, tzinfo=tzlocal()) dt_str = dt.strftime('%Y-%m-%d %H:%M:%S %Z') dt = dt.replace(tzinfo=None) dt = tz.localize(dt) tz = dt.tzinfo do = delorean.parse(dt_str) self.assertEqual(do.datetime, dt) self.assertEqual(do.timezone, tz)
def to_value(self, data): if isinstance(data, date) and not isinstance(data, datetime): self.fail('date') if isinstance(data, datetime): return data try: value = delorean.parse(data).datetime except ValueError: self.fail('invalid') return value
def test_repr_fixed_offset_timezone(self): import datetime from delorean import Delorean tz = pytz.timezone('US/Pacific') dt = tz.localize(datetime.datetime(2015, 1, 1)) dt_str = dt.strftime('%Y-%m-%d %H:%M:%S %z') d1 = delorean.parse(dt_str) d2 = eval(repr(d1)) self.assertEqual(d1, d2) self.assertEqual(d1.datetime, d2.datetime) self.assertEqual(d1.timezone, d2.timezone)
def parse_query_value(query_str): """ Return value for the query string """ try: query_str = str(query_str).strip('"\' ') if query_str == 'now': d = utcnow() else: # Parse datetime string or timestamp try: d = epoch(float(query_str)) except ValueError: d = parse(str(query_str)) except (TypeError, ValueError): d = None return d
def extract_rate_limit(self, response): """Extract rate limit info from response/headers. get it just from the response, so it is relevant to the type of query we are doing""" try: self.rate_limit_remaining = int(response.headers['x-rate-limit-remaining']) self.rate_limit_limit = int(response.headers['x-rate-limit-limit']) self.rate_limit_reset = epoch(int(response.headers['x-rate-limit-reset'])).datetime self.twitter_date = parse(response.headers['date']).datetime logging.debug( 'Twitter rate limit info:: rate-limit: %s, remaining: %s' % (self.rate_limit_limit, self.rate_limit_remaining)) # logging.debug( # 'Twitter rate limit info:: rate-limit: %s, remaining: %s, '\ # 'reset: %s, current-time: %s' % (self.rate_limit_limit, # self.rate_limit_remaining, self.rate_limit_reset, self.twitter_date)) except KeyError: pass
def get_info(month_range, *dois): res = dict() values = (0,)*3 infos = get_event_info(*dois) for doc in infos: doi = doc['doi'] title = doc['title'].replace(',','') res[doi] = {'title': title, 'months': {}} month = delorean.parse(doc['publication_date']).datetime.month monthdata = doc['sources'][0]['events'] if monthdata: for metric in monthdata: res[doi]['months'][int(metric['month'])-month+1] = metric['html_views'], metric['pdf_views'], metric['xml_views'] for doc in res.iterkeys(): months = [res[doc]['months'][x] for x in month_range] for i, month in enumerate(res[doc]['months'].itervalues()): yield (doc, res[doc]['title'], i, month[0], month[1], month[2])
def parse_query_value(query_str, tf): """ Return value for the query string """ try: query_str = str(query_str).strip('"\' ') if query_str == 'now': d = Delorean(timezone=tf) else: # Parse datetime string or timestamp try: d = epoch(float(query_str)) d.shift(tf); except ValueError: d = parse(str(query_str)) d.shift(tf); except (TypeError, ValueError): d = None return d
def index(): q = domain_date_histogram(top_domains(top_hashtags(function_score( base_query())))) ctx = {'query': q, 'tags': None} tag = request.args.get('tag', None) if tag: # add filter for selected hashtag ctx['selected_tag'] = tag f = q['query']['function_score']['query']['filtered']['filter'] f['bool']['must'].append({'term': { 'entities.hashtags.text': tag}}) domain = request.args.get('domain', None) if domain: # add filter for selected domain ctx['selected_domain'] = domain f = q['query']['function_score']['query']['filtered']['filter'] f['bool']['must'].append({'term': { 'entities.urls.domain': domain}}) result = es.search(app.config['ES_INDEX'], body=q) ctx['result'] = result tweets = [hit['_source'] for hit in result['hits']['hits']] ctx['tweets'] = [humanize_tweet_published(tweet) for tweet in tweets] date_hist_buckets = [] for bucket in result['aggregations']['date_hist']['buckets']: t = delorean.parse(bucket['key_as_string']) d = { 'key': t.shift('Europe/Berlin').datetime.strftime( '%Y-%m-%d %H:00'), } for subbucket in bucket['domains']['buckets']: k = subbucket['key'].replace('.', '') d[k] = subbucket['doc_count'] date_hist_buckets.append(d) ctx['date_hist_buckets'] = date_hist_buckets return render_template('index.html', **ctx)
def visit_timestamp(self, schema, pointer): path, actual_val = pointer.path(), pointer.value() is_nullable = 'nullable' in schema._params if is_nullable and actual_val is None: return [] is_type_valid = self.__is_type_valid(actual_val, schema._valuable_types, is_nullable) if not is_type_valid: expected_types = ['timestamp', 'null'] if is_nullable else 'timestamp' return [ValidationTypeError(path, actual_val, expected_types)] try: import delorean timestamp = delorean.parse(actual_val) except ValueError: return [ValidationTimestampError(path, actual_val)] if 'value' in schema._params: expected_val = schema._params['value'] is_value_valid = self.__is_value_valid(timestamp, expected_val, is_nullable) if not is_value_valid: return [ValidationValueError(path, actual_val, expected_val.datetime.isoformat())] errors = [] if 'min_value' in schema._params: min_value = schema._params['min_value'] if timestamp < min_value: errors += [ValidationMinValueError(path, actual_val, min_value.datetime.isoformat())] if 'max_value' in schema._params: max_value = schema._params['max_value'] if timestamp > max_value: errors += [ValidationMaxValueError(path, actual_val, max_value.datetime.isoformat())] if 'iso' in schema._params: is_pattern_match = self.__is_pattern_match(actual_val, self.iso8601) if not is_pattern_match: errors += [ValidationTimestampFormatError(path, actual_val, 'ISO 8601')] elif 'format' in schema._params: expected_format = schema._params['format'] if actual_val != timestamp.datetime.strftime(expected_format): errors += [ValidationTimestampFormatError(path, actual_val, expected_format)] return errors
def extract_rate_limit(self, response): """Extract rate limit info from response/headers. The rate limit Twitter API request response provides bad data in the headers, so check the payload first and fallback to headers for other request types.""" try: data = response.data['resources']['search']['/search/tweets'] self.rate_limit_remaining = data['remaining'] self.rate_limit_limit = data['limit'] self.rate_limit_reset = epoch(data['reset']).datetime except (KeyError, TypeError): self.rate_limit_remaining = int(response.headers['x-rate-limit-remaining']) self.rate_limit_limit = int(response.headers['x-rate-limit-limit']) self.rate_limit_reset = epoch(int(response.headers['x-rate-limit-reset'])).datetime self.twitter_date = parse(response.headers['date']).datetime logging.debug( 'Twitter rate limit info:: rate-limit: %s, remaining: %s, '\ 'reset: %s, current-time: %s' % (self.rate_limit_limit, self.rate_limit_remaining, self.rate_limit_reset, self.twitter_date))
def process_item(self, item, spider): if 'auctions' not in getattr(spider,'pipelines',[]): return item item['id'] = int(item['id'][0]) item['auctioneer'] = ' '.join(item['auctioneer']) item['contact_number'] = ' '.join(item['contact_number']) item['date'] = '%s %s' % (' '.join(item['date']), ' '.join(item['time'])) item['location'] = ' '.join(item['location']) item['link'] = ' '.join(item['link']) item['listing'] = ' '.join(item['listing']) #format phonenumber parsed_number = phonenumbers.parse(item['contact_number'],'US') item['contact_number'] = phonenumbers.format_number(parsed_number, phonenumbers.PhoneNumber()) # format listing / remove any html cludge soup_listing = BeautifulSoup(item['listing']) item['listing'] = soup_listing.get_text() # format date and time to standard format dt = parse(item['date']) item['date'] = dt.datetime.strftime('%Y-%m-%d %H:%M:%S') if item['id'] in self.ids: raise DropItem('Dupe auction stored, ignoring listing: %s' % item) else: self.dt.insert({ 'id': item['id'], 'auctioneer': item['auctioneer'], 'contact_number': item['contact_number'], 'date': item['date'], 'location': item['location'], 'link': item['link'], 'listing': item['listing'], }, 'auctions') return item
def test_parse_with_timezone_year_fill(self): do = delorean.parse('Thu Sep 25 10:36:28') dt1 = utc.localize(datetime(2013, 9, 25, 10, 36, 28)) self.assertEqual(do.datetime, dt1) self.assertEqual(do._tz, "UTC")
def test_parse(self): do = delorean.parse('Thu Sep 25 10:36:28 BRST 2003') dt1 = utc.localize(datetime(2003, 9, 25, 10, 36, 28)) self.assertEqual(do.datetime, dt1)
def retry_after_secs(e): sleep_time = (epoch(int(e.headers['x-rate-limit-reset'])).datetime - parse(e.headers['date']).datetime).seconds + 1 return sleep_time
def deserialize(self, value): """ Takes a UTC datetime string and returns a datetime object """ return parse(value).datetime
def test_parse_with_timezone_year_fill(self): do = delorean.parse('Thu Sep 25 10:36:28') dt1 = utc.localize(datetime(date.today().year, 9, 25, 10, 36, 28)) self.assertEqual(do.datetime, dt1) self.assertEqual(do._tz, "UTC")
def localize_datetime(timezone, string): gametime = parse(string) gametime.shift(timezone) return gametime.datetime.strftime("%Y-%m-%d %I:%M %p")
import dateutil # existing expiration (days since epoch) dd = 19997 exp = delorean.epoch(0) + datetime.timedelta(days=dd) print('existing exp = ', exp) print('existing exp.datetime = ', exp.datetime) dts = dateutil.parser.parse(str(exp.datetime)) print('existing dts.date() = ', dts.date()) print('existing delorean.Delorean(dts) = ', delorean.Delorean(dts)) print('') proposed_exp_str = '2021/6/14' proposed_exp = delorean.parse(proposed_exp_str) print('proposed_exp = ', proposed_exp) if delorean.Delorean(dts) > proposed_exp: print('existing is later than proposed') else: print('existing is earlier than proposed') print('') proposed_exp_str = '2031/2/14' proposed_exp = delorean.parse(proposed_exp_str) print('proposed_exp = ', proposed_exp) if delorean.Delorean(dts) > proposed_exp: print('existing is later than proposed')
import feedparser import datetime import delorean import requests rss = feedparser.parse( 'http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml') # print(rss.updated) time_limit = delorean.parse(rss.updated) - datetime.timedelta(hours=6) entries = [ entry for entry in rss.entries if delorean.parse(entry.published) > time_limit ] # print(len(entries)) # print(len(rss.entries)) for entry in entries: print(entry['title'], entry['link'])
# Imagine that we need to parse information stored in sales logs. # We'll use a sales log with the following structure: # # [<Timestamp in iso format>] - SALE - PRODUCT: <product id> - PRICE: $<price of the sale> # [2018-05-05T10:58:41.504054] - SALE - PRODUCT: 1345 - PRICE: $09.99 import delorean from decimal import Decimal log = '[2018-05-05T11:07:12.267897] - SALE - PRODUCT: 1345 - PRICE: $09.99' divide_it = log.split(' - ') timestamp_string, _, product_string, price_string = divide_it timestamp = delorean.parse(timestamp_string.strip('[]')) # Parse the product_id into a integer: product_id = int(product_string.split(':')[-1]) # Parse the price into a Decimal type: price = Decimal(price_string.split('$')[-1]) # Now, you have all the values in native Python formats: # print(timestamp, product_id, price) # we can use class class PriceLog(object): def __init__(self, timestamp, product_id, price): self.timestamp = timestamp self.product_id = product_id self.price = price def __repr__(self):
print("Today's month = {}".format(today.datetime.month)) print("Today's day of month = {}".format(today.datetime.day)) print("Today (trunc. to month) = {}".format(today.truncate('month'))) last_month = today - datetime.timedelta(days=(today.datetime.day + 1)) period_str = last_month.date.strftime('%Y-%m') print("Last month = {}".format(last_month.datetime.month)) print("Last month's year = {}".format(last_month.datetime.year)) print("period_str = {}".format(period_str)) # calendar.monthrange() returns weekday of the first day of the month # and the number of days in the month for the given year & month print("Month range = {}".format( calendar.monthrange(last_month.datetime.year, last_month.datetime.month))) print("--------------------------------") today = delorean.Delorean() test_arithmetic(today) today = delorean.parse("2020/2/1 00:00:00", dayfirst=False) test_arithmetic(today) today = delorean.parse("2020/3/2 00:00:00", dayfirst=False) test_arithmetic(today) today = delorean.parse("2021/1/2 00:00:00", dayfirst=False) test_arithmetic(today)
def main(): tz = 'US/Eastern' l = [] # collect data in list driver = webdriver.Firefox(log_path='/Users/sechilds/geckodriver.log') # York Library driver.get('https://www.library.yorku.ca/find/MyResearch/CheckedOut') username = driver.find_element_by_id('loginUsername') username.send_keys(york_email) password_box = driver.find_element_by_id('loginPassword') password_box.send_keys(york_password) login_button = driver.find_element_by_name('submit') login_button.click() sleep(3) try: media_list = driver.find_element_by_class_name('media-list') except NoSuchElementException: media_list = False if media_list: first_york_due = True items = media_list.find_elements_by_tag_name('li') for item in items: media_title = item.find_element_by_class_name('media-heading').text media_author = item.find_element_by_class_name('author-info').text checkout_details = item.find_element_by_class_name( 'checkout-details') checkout_details_list = checkout_details.find_elements_by_tag_name( 'dd') due_date = checkout_details_list[1].text item_date_due = parse(due_date, timezone=tz) how_long = item_date_due - Delorean(timezone=tz) + timedelta( days=1) day_text = ('1 day' if how_long.days == 1 else f'{how_long.days} days') if first_york_due: first_york_due_days = day_text first_york_due = False l.append( f'{how_long.days}D: {media_title}/{media_author} is due in {day_text} on {due_date}' ) driver.get('https://torontopubliclibrary.ca/signin') card_no = driver.find_element_by_id('userId') card_no.send_keys(library_card) password_box = driver.find_element_by_id('password') password_box.send_keys(library_pin) submit_button = driver.find_element_by_class_name('signin') submit_button.click() driver.get('https://account.torontopubliclibrary.ca/checkouts') sleep(8) try: item_table = driver.find_element_by_class_name('item-list') except NoSuchElementException: item_table = False first_due_days = "" if item_table: first_due = True html_table = item_table.get_attribute('outerHTML') soup = BeautifulSoup(html_table, 'html.parser') rows = soup.findAll("tr") for row in rows: cells = row.find_all('td') try: item_due = cells[3].text item_parts = cells[2].find_all('div') item_title = item_parts[0].text item_author = item_parts[1].text try: item_date_due = parse(item_due, timezone=tz) except ValueError: due_in_parens = item_due[item_due.find("(") + 1:item_due.find(")")] item_date_due = parse(due_in_parens, timezone=tz) how_long = item_date_due - Delorean(timezone=tz) + timedelta( days=1) day_text = ('1 day' if how_long.days == 1 else f'{how_long.days} days') if first_due: first_due_days = day_text first_due = False item_name = cells[2].text l.append( f'{how_long.days}D: {item_title} by {item_author} is due in {day_text} on {item_due}' ) except IndexError: pass # the problem with holds - there are 3 tables: # holds-redux still-on-hold # holds-redux in-transit # holds-redux ready-for-pickup # what's the 3rd one??? driver.get('https://account.torontopubliclibrary.ca/holds') sleep(3) # books ready for pickup try: ready_for_pickup = driver.find_element_by_class_name( 'ready-for-pickup') except NoSuchElementException: ready_for_pickup = False if ready_for_pickup: first_hold = True item_table = ready_for_pickup.find_element_by_class_name('item-list') html_table = item_table.get_attribute('outerHTML') soup = BeautifulSoup(html_table, 'html.parser') rows = soup.findAll("tr") for row in rows: cells = row.find_all('td') try: #item_due = cells[5].find_all('div') hold_due = cells[5] hold_due_date = list(hold_due.stripped_strings)[1] hold_date_due = parse(hold_due_date, timezone=tz) item_parts = cells[2].find_all('div') #for i, item in enumerate(cells): # print(f'part {i}: {item.text}') #for i, item in enumerate(item_due): # print(f'part {i}: {item.text}') item_title = item_parts[0].text item_author = item_parts[1].text how_long = hold_date_due - Delorean(timezone=tz) + timedelta( days=1) hold_day_text = ('1 day' if how_long.days == 1 else f'{how_long.days} days') if first_hold: first_hold_days = hold_day_text first_hold = False l.append( f'Hold on {item_title} by {item_author} is in ready for pickup. Pick up by {hold_date_due.format_datetime()} ({hold_day_text})' ) except IndexError: pass # books in transit try: in_transit = driver.find_element_by_class_name('in-transit') except NoSuchElementException: in_transit = False if in_transit: item_table = in_transit.find_element_by_class_name('item-list') html_table = item_table.get_attribute('outerHTML') soup = BeautifulSoup(html_table, 'html.parser') rows = soup.findAll("tr") for row in rows: cells = row.find_all('td') try: item_due = cells[3].text item_parts = cells[2].find_all('div') #for i, item in enumerate(cells): # print(f'part {i}: {item.text}') item_title = item_parts[0].text item_author = item_parts[1].text l.append( f'Hold on {item_title} by {item_author} is in transit.') except IndexError: pass # look at those still on hold try: still_on_hold = driver.find_element_by_class_name('still-on-hold') except NoSuchElementException: still_on_hold = False if still_on_hold: item_table = still_on_hold.find_element_by_class_name('item-list') html_table = item_table.get_attribute('outerHTML') soup = BeautifulSoup(html_table, 'html.parser') rows = soup.findAll("tr") for row in rows: cells = row.find_all('td') try: item_due = cells[3].text item_parts = cells[2].find_all('div') #for i, item in enumerate(cells): # print(f'part {i}: {item.text}') item_title = item_parts[0].text item_author = item_parts[1].text item_position = cells[3].text item_status = cells[5].text l.append( f'Hold on {item_title} by {item_author} is {item_status}. Position: {item_position}.' ) # item_date_due = parse(item_due, timezone = tz) # how_long = item_date_due - Delorean(timezone = tz) #item_name = cells[2].text #print(f'{item_title} by {item_author} is due in {how_long.days} days on {item_due}') #print(cells) except IndexError: pass driver.close() s = 'Next:' if item_table: s += f' Due: {first_due_days}' if media_list: s += f' York: {first_york_due_days}' if ready_for_pickup: s += f' Holds: {first_hold_days}' if item_table or ready_for_pickup or media_list: safe_print(s) else: safe_print('Nothing Due or Ready for Pickup') for line in l: safe_print(line)
else: # wait for instance deletion while deleted_db.status != 'deleted': print "waiting for db deletion..." time.sleep(120) try: deleted_db.update() except boto.exception.BotoServerError as e: if e.reason != 'Not Found': raise e break snapshots = rds_connection.get_all_dbsnapshots(instance_id=source_db) if not snapshots: exit('{} has no snapshots'.format(source_db)) snapshot = max(snapshots, key=lambda x: parse(x.snapshot_create_time)).id # print "creating snapshot from {}...".format(source_db) # rds_connection.create_dbsnapshot(snapshot, source_db) # # # wait for it to become available # # about 2m # while rds_connection.get_all_dbsnapshots( # snapshot_id=snapshot)[0].status != 'available': # print "waiting for snapshot availability..." # time.sleep(60) # instead of creating a new snapshot use the latest extant one (and # rely on something else to create snapshots) snapshots = rds_connection.get_all_dbsnapshots(instance_id=source_db) if not snapshots:
def function_data_sub(t): if not isinstance(t, ParseResults): raise TypeError('should be ParseResults type') cur_date, num, tp = t[0], t[1], t[2] d = parse(cur_date).__getattr__('last_%s' % tp.lower())(int(num)) return d.datetime.strftime("%Y-%m-%d %H:%M:%S")
def testparse_with_timezone(self): d1 = delorean.parse("2011/01/01 00:00:00 -0700") d2 = datetime(2011, 1, 1, 7, 0) d2 = utc.localize(d2) self.assertEqual(d2, d1.datetime) self.assertEqual(utc, timezone(d1._tz))
def create_question(): if not get_is_logged_in(c): print("requested to add a question with no login") b.redirect("/") question = req.POST.get("question").strip() is_multiple_choice = req.POST.get("is-multiple-choice") == "on" ask_time = req.POST.get("ask-time") ask_time = dmc.parse(ask_time, UK_TZ, dayfirst=False) ask_time = ask_time.shift("UTC") # question length check if len(question) + len("question x: ") > MAX_TWEET_LEN: print("question too long") b.redirect("/questions" + urlencode({"message": "Question too long"})) # question existence check if not question: print("no question!") b.redirect("/questions?" + urlencode({"message": "No question has been provided"})) print("\napparently..") print(" question is", question) print(" multiple choice is", is_multiple_choice) print(" ask time is", ask_time, ask_time.epoch) if is_multiple_choice: print("is multiple choice") answers = { "a": req.POST.get("answer-a"), "b": req.POST.get("answer-b"), "c": req.POST.get("answer-c"), "d": req.POST.get("answer-d"), } answers = {letter: answers[letter] for letter in answers if answers[letter].strip()} answers_list = list(answers.values()) correct_answer = req.POST.get("correct-answer") print(" answers are", answers, "(" + str(answers_list) + ")") print(" correct answer is", correct_answer) # answers length check for answer in answers_list: if len(answer) + len("is it x: ?") > MAX_TWEET_LEN: print("possible") b.redirect("/questions" + urlencode({"message": "Possible answer too long"})) # answers quantity check if len(answers) < 2: b.redirect("/questions?" + urlencode({"message": "Not enough answers given - two or more needed"})) # answers duplicate check if len(answers) != len(set(answers_list)): b.redirect("/questions?" + urlencode({"message": "Duplicate answers given"})) # correct answer existence check if not correct_answer: b.redirect("/questions?" + urlencode({"message": "No correct answer specified"})) # check for matching correct answer if ( (correct_answer == "a" and not answers.get("a")) or (correct_answer == "b" and not answers.get("b")) or (correct_answer == "c" and not answers.get("c")) or (correct_answer == "d" and not answers.get("d")) ): print( "no matching correct answer! correct_answer is", correct_answer, "and answers are", answers ) b.redirect("/questions?" + urlencode({"message": "The correct answer specified does not exist"})) # insert question c.execute("INSERT INTO questions (ask_time, question) VALUES (?, ?)", (ask_time.epoch, question)) question_id = c.lastrowid print("inserted question with id", question_id) if is_multiple_choice: for letter in answers: # insert each possible answer c.execute(""" INSERT INTO possible_answers (question_id, letter, answer) VALUES ( ?, ?, ? ); """, ( question_id, letter, answers[letter], )) possible_answer_id = c.lastrowid print("inserted possible_answer with id", possible_answer_id) if letter == correct_answer: # store the correct answer id c.execute( "UPDATE questions SET possible_answer_id = ? WHERE id = ?", (possible_answer_id, question_id) ) print("updated question with possible_answer_id", possible_answer_id) conn.commit() b.redirect("/questions")
def isodate(string): return delorean.parse(string)
parser.add_argument('-f', type=argparse.FileType('r')) args = parser.parse_args() application = create_app() application.app_context().push() csv_reader = csv.DictReader(args.f, delimiter='\t') for row in csv_reader: # Check the Product product = Product.query.get(row['id']) if not product: product = Product(id=row['id'], description=row['description']) db.session.add(product) # Parse to get the time in UTC timestamp = parse(row['datetime']) timestamp.shift('UTC') location = Location(product_id=row['id'], timestamp=timestamp.datetime, latitude=row['latitude'], longitude=row['longitude'], elevation=row['elevation']) db.session.add(location) db.session.commit() # We need to update the sequence for the products, as it has become # out of date db.session.execute(''' SELECT setval('product_id_seq',
def get_waf_metrics(): # Ffetching WAF data has the potention of taking ages to complete. # As this will keep the exporter from gathering any other data else, # introduce an option to just not run it. if not os.environ.get('ENABLE_WAF'): logging.info('Fetching WAF data is disabled') return '' path_format = '%szones/%s/firewall/events?per_page=50%s' zone_id = get_zone_id() window_start_time = delorean.now().epoch window_end_time = window_start_time - 60 records = [] next_page_id = '' logging.info('Fetching WAF event data starting at %s, going back 60s' % delorean.epoch(window_start_time).format_datetime()) while next_page_id is not None: url = path_format % (ENDPOINT, zone_id, next_page_id) r = get_data_from_cf(url=url) if 'success' not in r or not r['success']: logging.error('Failed to get information from Cloudflare') for error in r['errors']: logging.error('[%s] %s' % (error['code'], error['message'])) return '' if r['result_info']['next_page_id']: next_id = r['result_info']['next_page_id'] logging.debug('Set next_page_id to %s' % next_id) next_page_id = ('&next_page_id=%s' % next_id) else: next_page_id = None for event in r['result']: occurred_at = event['occurred_at'] occurrence_time = delorean.parse(occurred_at).epoch logging.debug('Occurred at: %s (%s)' % (occurred_at, occurrence_time)) if occurrence_time <= window_end_time: logging.debug('Window end time reached, breaking') next_page_id = None break logging.debug('Adding WAF event') records.append(event) now = delorean.now().epoch logging.info('%d WAF events found (took %g seconds so far)' % (len(records), now - window_start_time)) if now - window_start_time > 55: logging.warn('Too many WAF events, skipping (metrics affected)') next_page_id = None return wafexporter.process(records)