def handle_feed_submission_response(merchant, response, feed_xml=None): fsinfo = response.FeedSubmissionInfo try: submission = FeedSubmission.objects.get( submission_id=fsinfo.FeedSubmissionId, date_submitted=du_parse(fsinfo.SubmittedDate), feed_type=fsinfo.FeedType, ) except FeedSubmission.DoesNotExist: submission = FeedSubmission( submission_id=fsinfo.FeedSubmissionId, date_submitted=du_parse(fsinfo.SubmittedDate), feed_type=fsinfo.FeedType, ) if feed_xml: submission.feed_xml = feed_xml submission.merchant = merchant submission.processing_status = fsinfo.FeedProcessingStatus submission.save() logger.info( "Feed submission successful as ID {0}".format(submission.submission_id) ) return submission
def handle_feed_submission_response(merchant, response, feed_xml=None): """ Processes the response received from MWS when submitting an XML feed. It creates a new :class:`FeedSubmission <oscar_mws.models.FeedSubmission>` from the ID in the *response* as well as submission metadata such as the feed submission date. If the original XML is passed in as *feed_xml* it is stored with the feed submission for reference and easier debugging. :param MerchantAccount merchant: the merchant account the feed was submitted to. :param response: the original response dict returned by MWS. :param str feed_xml: XML submitted to MWS or None """ fsinfo = response.FeedSubmissionInfo try: submission = FeedSubmission.objects.get( submission_id=fsinfo.FeedSubmissionId, date_submitted=du_parse(fsinfo.SubmittedDate), feed_type=fsinfo.FeedType, ) except FeedSubmission.DoesNotExist: submission = FeedSubmission( submission_id=fsinfo.FeedSubmissionId, date_submitted=du_parse(fsinfo.SubmittedDate), feed_type=fsinfo.FeedType, ) if feed_xml: submission.feed_xml = feed_xml submission.merchant = merchant submission.processing_status = fsinfo.FeedProcessingStatus submission.save() logger.info("Feed submission successful as ID {}".format( submission.submission_id)) return submission
def cancel_submission(submission): merchant = submission.merchant feeds_api = get_merchant_connection(merchant.seller_id).feeds response = feeds_api.cancel_feed_submissions( feedids=[submission.submission_id] ).parsed result = response.get('FeedSubmissionInfo') try: submission = FeedSubmission.objects.get( submission_id=result.FeedSubmissionId, date_submitted=du_parse(result.SubmittedDate), feed_type=result.FeedType, ) except FeedSubmission.DoesNotExist: submission = FeedSubmission( submission_id=result.FeedSubmissionId, date_submitted=du_parse(result.SubmittedDate), feed_type=result.FeedType, ) if submission.processing_status != result.FeedProcessingStatus: return submission submission.merchant = merchant submission.processing_status = result.FeedProcessingStatus submission.save() return submission
def list_submitted_feeds(merchants=None): if not merchants: merchants = MerchantAccount.objects.all() feed_info = {} for merchant in merchants: feeds_api = get_merchant_connection(merchant.seller_id).feeds response = feeds_api.get_feed_submission_list() feed_info[merchant.seller_id] = [] for feed in response.GetFeedSubmissionListResult.FeedSubmissionInfo: feed_info[merchant.seller_id].append({ 'submission_id': feed.FeedSubmissionId, 'feed_type': feed.FeedType, 'status': feed.FeedProcessingStatus, 'date_submitted': du_parse(feed.get('SubmittedDate') or ''), 'date_processing_started': du_parse(feed.get('StartedProcessingDate') or ''), 'date_processing_ended': du_parse(feed.get('CompletedProcessingDate') or ''), }) return feed_info
def regular_season_player_averages_within_date_range(player_identifier, season_end_year, from_date, to_date, output_type=None, output_file_path=None, output_write_option=None, json_options=None): try: values = http_client.regular_season_player_averages_within_date_range( player_identifier=player_identifier, season_end_year=season_end_year, from_date=du_parse(from_date), to_date=du_parse(to_date)) except requests.exceptions.HTTPError as http_error: if http_error.response.status_code == requests.codes.internal_server_error \ or http_error.response.status_code == requests.codes.not_found: raise InvalidPlayerAndSeason(player_identifier=player_identifier, season_end_year=season_end_year) else: raise http_error return output( values=values, output_type=output_type, output_file_path=output_file_path, output_write_option=output_write_option, csv_writer=CSVWriter( column_names=PLAYER_SEASON_AVERAGES_WITHIN_DATE_RANGE_COLUMN_NAMES, row_formatter=RowFormatter( data_field_names= PLAYER_SEASON_AVERAGES_WITHIN_DATE_RANGE_COLUMN_NAMES)), json_options=json_options, )
def list_submitted_feeds(merchants=None): if not merchants: merchants = MerchantAccount.objects.all() feed_info = {} for merchant in merchants: feeds_api = get_merchant_connection(merchant.seller_id, 'feeds') response = feeds_api.get_feed_submission_list() feed_info[merchant.seller_id] = [] for feed in response.GetFeedSubmissionListResult.FeedSubmissionInfo: feed_info[merchant.seller_id].append({ 'submission_id': feed.FeedSubmissionId, 'feed_type': feed.FeedType, 'status': feed.FeedProcessingStatus, 'date_submitted': du_parse( feed.get('SubmittedDate') or '' ), 'date_processing_started': du_parse( feed.get('StartedProcessingDate') or '' ), 'date_processing_ended': du_parse( feed.get('CompletedProcessingDate') or '' ), }) return feed_info
def cancel_submission(submission): merchant = submission.merchant feeds_api = get_merchant_connection(merchant.seller_id).feeds response = feeds_api.cancel_feed_submissions( feedids=[submission.submission_id]).parsed result = response.get('FeedSubmissionInfo') try: submission = FeedSubmission.objects.get( submission_id=result.FeedSubmissionId, date_submitted=du_parse(result.SubmittedDate), feed_type=result.FeedType, ) except FeedSubmission.DoesNotExist: submission = FeedSubmission( submission_id=result.FeedSubmissionId, date_submitted=du_parse(result.SubmittedDate), feed_type=result.FeedType, ) if submission.processing_status != result.FeedProcessingStatus: return submission submission.merchant = merchant submission.processing_status = result.FeedProcessingStatus submission.save() return submission
def handle_feed_submission_response(merchant, response, feed_xml=None): """ Processes the response received from MWS when submitting an XML feed. It creates a new :class:`FeedSubmission <oscar_mws.models.FeedSubmission>` from the ID in the *response* as well as submission metadata such as the feed submission date. If the original XML is passed in as *feed_xml* it is stored with the feed submission for reference and easier debugging. :param MerchantAccount merchant: the merchant account the feed was submitted to. :param response: the original response dict returned by MWS. :param str feed_xml: XML submitted to MWS or None """ fsinfo = response.FeedSubmissionInfo try: submission = FeedSubmission.objects.get( submission_id=fsinfo.FeedSubmissionId, date_submitted=du_parse(fsinfo.SubmittedDate), feed_type=fsinfo.FeedType, ) except FeedSubmission.DoesNotExist: submission = FeedSubmission( submission_id=fsinfo.FeedSubmissionId, date_submitted=du_parse(fsinfo.SubmittedDate), feed_type=fsinfo.FeedType, ) if feed_xml: submission.feed_xml = feed_xml submission.merchant = merchant submission.processing_status = fsinfo.FeedProcessingStatus submission.save() logger.info( "Feed submission successful as ID {}".format(submission.submission_id)) return submission
def test_start_end_dates(self): """ a simple start/end date tile """ tile_no_tz = { "imageURI": "data:image/png;base64,somedata", "url": "https://somewhere.com", "title": "Some Title", "type": "organic", "bgColor": "#FFFFFF", "time_limits": { "start": "2014-01-12T00:00:00.000", "end": "2014-01-31T00:00:00.000" } } dist = {"US/en-US": [tile_no_tz]} c = self.env.db.session.query(Adgroup).count() assert_equal(30, c) data = ingest_links(dist, self.channels[0].id) assert_equal(1, len(data["US/en-US"])) c = self.env.db.session.query(Adgroup).count() assert_equal(31, c) tile = self.env.db.session.query(Tile).filter(Tile.id == 31).one() ag = self.env.db.session.query(Adgroup).filter(Adgroup.id == 31).one() assert_equal(tile.adgroup_id, ag.id) assert_equal(ag.start_date, dist["US/en-US"][0]['time_limits']['start']) assert_equal(ag.end_date, dist["US/en-US"][0]['time_limits']['end']) assert_equal(ag.start_date_dt, du_parse(dist["US/en-US"][0]['time_limits']['start'])) assert_equal(ag.end_date_dt, du_parse(dist["US/en-US"][0]['time_limits']['end']))
def store_input(input_file, session): input_file = import_file(input_file) client_id = int(input_file.client_id) with session.begin(): contract = None try: contract_id = int(input_file.contract_id) contract = session.query(Contract).filter(Contract.id==contract_id).first() except: pass currency = None try: currency = input_file.currency except: pass client = session.query(Client).filter(Client.id==client_id).first() date = date.today() total_price = sum([t['price'] for t in args.input.tasks]) invoice = InvoiceSent(client_id=client_id, date=date, taxes=0, total=total_price, pre_tax=total_price, from_date=du_parse(args.input.from_date), to_date=du_parse(input_file.to_date), currency=currency) session.add(invoice) tasks = [Task(client_id=client_id, contract_id=contract_id, invoice=invoice, date=du_parse(task['date']), time_amount=task['time_amount'], price=task['price'], detail=task['detail'] ) for task in input_file.tasks] session.add_all(tasks) return invoice, tasks, client, contract
def _parse_condition(task_str): """Extract the condition, if any, from the task string.""" keyword, arg, _task = task_str.split(' ', 2) if keyword in ['by', 'before']: return lambda t: t <= du_parse(arg) if keyword in ['after']: return lambda t: t >= du_parse(arg) return lambda _t: True
def test_parses_the_response_correctly(self): xml_data = self.load_data('get_fulfillment_order_response.xml') httpretty.register_uri( httpretty.GET, 'https://mws.amazonservices.com/FulfillmentOutboundShipment/2010-10-01', body=xml_data, ) basket = factories.BasketFactory() basket.add_product(factories.ProductFactory()) order = create_order(basket=basket) update_fulfillment_order( factories.FulfillmentOrderFactory(order=order) ) fulfillment_order = FulfillmentOrder.objects.all()[0] self.assertEquals(FulfillmentOrder.objects.count(), 1) self.assertEquals(fulfillment_order.status, 'COMPLETE') shipments = FulfillmentShipment.objects.all() self.assertEquals(len(shipments), 1) expected = { 'Dkw.3ko298': { 'shipment_id': 'Dkw.3ko298', 'status': 'SHIPPED', 'fulfillment_center_id': 'FCID01', 'date_shipped': du_parse('2013-10-29T00:50:03Z'), 'date_estimated_arrival': du_parse('2013-10-30T23:59:59Z'), }, } for shipment in shipments: for attr, value in expected[shipment.shipment_id].iteritems(): self.assertEquals(getattr(shipment, attr), value) packages = ShipmentPackage.objects.all() self.assertEquals(len(packages), 1) self.assertEquals(packages[0].tracking_number, 'MPT_1234') self.assertEquals(packages[0].carrier_code, 'Magic Parcels') shipping_events = ShippingEvent.objects.all() self.assertEquals(len(shipping_events), 1) self.assertItemsEqual( [s.notes for s in shipping_events], ['* Shipped package via Magic Parcels with tracking number ' 'MPT_1234'] )
def test_parses_the_response_correctly(self): xml_data = self.load_data('get_fulfillment_order_response.xml') httpretty.register_uri( httpretty.GET, 'https://mws.amazonservices.com/FulfillmentOutboundShipment/2010-10-01', body=xml_data, ) basket = factories.BasketFactory() basket.add_product(factories.ProductFactory()) order = create_order(basket=basket) update_fulfillment_order( factories.FulfillmentOrderFactory(order=order)) fulfillment_order = FulfillmentOrder.objects.all()[0] self.assertEquals(FulfillmentOrder.objects.count(), 1) self.assertEquals(fulfillment_order.status, 'COMPLETE') shipments = FulfillmentShipment.objects.all() self.assertEquals(len(shipments), 1) expected = { 'Dkw.3ko298': { 'shipment_id': 'Dkw.3ko298', 'status': 'SHIPPED', 'fulfillment_center_id': 'FCID01', 'date_shipped': du_parse('2013-10-29T00:50:03Z'), 'date_estimated_arrival': du_parse('2013-10-30T23:59:59Z'), }, } for shipment in shipments: for attr, value in expected[shipment.shipment_id].iteritems(): self.assertEquals(getattr(shipment, attr), value) packages = ShipmentPackage.objects.all() self.assertEquals(len(packages), 1) self.assertEquals(packages[0].tracking_number, 'MPT_1234') self.assertEquals(packages[0].carrier_code, 'Magic Parcels') shipping_events = ShippingEvent.objects.all() self.assertEquals(len(shipping_events), 1) self.assertItemsEqual([s.notes for s in shipping_events], [ '* Shipped package via Magic Parcels with tracking number ' 'MPT_1234' ])
def test_parse_dates_custom_euro_format(all_parsers, kwargs): parser = all_parsers data = """foo,bar,baz 31/01/2010,1,2 01/02/2010,1,NA 02/02/2010,1,2 """ if "dayfirst" in kwargs: df = parser.read_csv( StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: du_parse(d, **kwargs), header=0, index_col=0, parse_dates=True, na_values=["NA"], ) exp_index = Index( [ datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2) ], name="time", ) expected = DataFrame( { "Q": [1, 1, 1], "NTU": [2, np.nan, 2] }, index=exp_index, columns=["Q", "NTU"], ) tm.assert_frame_equal(df, expected) else: msg = "got an unexpected keyword argument 'day_first'" with pytest.raises( TypeError, match=msg), tm.assert_produces_warning(FutureWarning): parser.read_csv( StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: du_parse(d, **kwargs), skiprows=[0], index_col=0, parse_dates=True, na_values=["NA"], )
def process_time(self, reference_date, time_string): dt = du_parse('%s %s' % ( reference_date.strftime('%Y-%m-%d'), time_string.replace(u'\xa0', ' ').strip())) if datetime.time(0, 0) <= dt.time() <= datetime.time(5, 0): dt += datetime.timedelta(1) return dt
def parse(self, timespec: str) -> List[Tuple[float, datetime]]: try: # TODO: This clipping is suboptimal, e.g. 2020 should probably be 2020-01-01, but # 'Tuesday' should still be pinned to the current date. Find out the actual parse # spec used and adjust reference = self.now.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=self.timezone) parse: datetime skipped: Tuple[str] parse, skipped = du_parse( timespec, # type: ignore default=reference, fuzzy=True, fuzzy_with_tokens=True) # Slightly discount magic w.r.t. FormatStringStrategy, ignore single whitespaces confidence = (1 / 2)**len([tok for tok in skipped if tok != ' ']) * 99. return [(confidence, parse.astimezone(self.timezone))] except ValueError: return [] except OverflowError: return []
def __init__(self, t, k, name, url, created_time): super(MixCloudItem, self).__init__() self.type = t self.key = k self.name = name self.url = url self.created_time = du_parse(created_time)
def deserialize(self, serialized, **kwargs): value = None if isinstance(serialized, (float, int)): value = datetime.fromtimestamp(serialized) self.set_value(value, **kwargs) elif isinstance(serialized, six.string_types): value = du_parse(serialized) self.set_value(value, **kwargs)
def update_feed_submissions(merchant): """ Check the MWS API for updates on previously submitted feeds. If *submission_id* is specified only the feed submission matching that ID is requested. Otherwise, all submission that are stored in the database that are not _DONE_ or _CANCELLED_ are requested. Returns List of updated ``FeedSubmission`` instances. """ submissions = FeedSubmission.objects.exclude( processing_status__in=[am.STATUS_DONE, am.STATUS_CANCELLED], merchant=merchant ) feeds_api = get_merchant_connection(merchant.seller_id).feeds response = feeds_api.get_feed_submission_list( feedids=[s.submission_id for s in submissions] or None ).parsed if response.HasNext: #TODO: need to handle this flag response.NextToken updated_feeds = [] for result in response.get_list('FeedSubmissionInfo'): try: submission = FeedSubmission.objects.get( submission_id=result.FeedSubmissionId, date_submitted=du_parse(result.SubmittedDate), feed_type=result.FeedType, ) except FeedSubmission.DoesNotExist: submission = FeedSubmission( submission_id=result.FeedSubmissionId, date_submitted=du_parse(result.SubmittedDate), feed_type=result.FeedType, ) if submission.processing_status != result.FeedProcessingStatus: updated_feeds.append(submission) submission.merchant = merchant submission.processing_status = result.FeedProcessingStatus submission.save() return updated_feeds
def update_feed_submissions(merchant): """ Check the MWS API for updates on previously submitted feeds. If *submission_id* is specified only the feed submission matching that ID is requested. Otherwise, all submission that are stored in the database that are not _DONE_ or _CANCELLED_ are requested. Returns List of updated ``FeedSubmission`` instances. """ submissions = FeedSubmission.objects.exclude( processing_status__in=[am.STATUS_DONE, am.STATUS_CANCELLED], merchant=merchant ) feeds_api = get_merchant_connection(merchant.seller_id, 'feeds') response = feeds_api.get_feed_submission_list( feedids=[s.submission_id for s in submissions] or None ).parsed if response.HasNext: # TODO: need to handle this flag response.NextToken updated_feeds = [] for result in response.get_list('FeedSubmissionInfo'): try: submission = FeedSubmission.objects.get( submission_id=result.FeedSubmissionId, date_submitted=du_parse(result.SubmittedDate), feed_type=result.FeedType, ) except FeedSubmission.DoesNotExist: submission = FeedSubmission( submission_id=result.FeedSubmissionId, date_submitted=du_parse(result.SubmittedDate), feed_type=result.FeedType, ) if submission.processing_status != result.FeedProcessingStatus: updated_feeds.append(submission) submission.merchant = merchant submission.processing_status = result.FeedProcessingStatus submission.save() return updated_feeds
def parse_to_utc_notz(dt_str): """ Return a TZ unaware dt in UTC """ dt = du_parse(dt_str) if dt.tzinfo: dt = dt.astimezone(pytz.utc).replace(tzinfo=None) return dt
def crawl_ip3366(url): base_url = f"{url}&page=%s" index = 1 time_now = datetime.now() continue_flag = True while continue_flag: url = base_url % index resp = session.request('get', url) sel = Selector(text=resp.data.decode('gb2312')) lst = sel.xpath( '//table[@class="table table-bordered table-striped"]/tbody/tr') if not lst: continue_flag = False for item in lst: ip = item.xpath('./td[1]/text()').extract_first() port = item.xpath('./td[2]/text()').extract_first() _anonymous = item.xpath('./td[3]/text()').extract_first() if _anonymous == '高匿代理IP': anonymous = '高匿' elif _anonymous == '普通代理IP': anonymous = '普匿' else: anonymous = '透明' type = item.xpath('./td[4]/text()').extract_first() address = item.xpath('./td[5]/text()').extract_first() speed = item.xpath('./td[6]/text()').extract_first() _last_check_time = item.xpath('./td[7]/text()').extract_first() last_check_time = du_parse(_last_check_time) if not within_delta(time_now, last_check_time, timedelta(days=1)): continue_flag = False print( f"""ip: {ip}, port: {port}, address: {address}, type: {type}, speed: {speed}, last_check_time: {last_check_time}""" ) proxies = { 'http': f'http://{ip}:{port}', 'https': f'http://{ip}:{port}' } proxy_info = { 'ip': ip, 'port': port, 'address': address, 'type': type, 'anonymous': anonymous, 'speed': speed, 'connect_time': None, 'living_time': None, 'last_check_time': last_check_time, 'source': 'ip3366' } tasks.append(proxies_checker(proxies, proxy_info)) index += 1
def main(): hours = load_pickles('disneyland-hours.pickle') events = load_pickles('disneyland-events.pickle') passes = load_pickles('disneyland-passes.pickle') forecast = get_forecast() spreadsheet = petl.fromcsv( 'https://spreadsheets.google.com/tq?key=%s&gid=0&tqx=out:csv' % DISNEY_SPREADSHEET_KEY ) events_lookup = {} for item in events: buff = events_lookup.get(item['date'].date(), {}) buff[item['park']] = item buff['date'] = item['date'].date() events_lookup[item['date'].date()] = buff for item in spreadsheet.dicts(): # print item sheet_date = du_parse(item['date']).date() if events_lookup.has_key(sheet_date): e = events_lookup[sheet_date] e['disneyland']['crowd_level'] = item['disneyland_crowd_level'] e['disneyland']['closures'] = [x for x in map( unicode.strip, item['disneyland_closures'].split(',')) if x] e['disney-california-adventure']['crowd_level'] = \ item['california_adventure_crowd_level'] e['disney-california-adventure']['closures'] = \ [x for x in map( unicode.strip, item['california_adventure_closures'].split(',')) if x] for item in hours: if events_lookup.has_key(item['date'].date()): events_lookup[item['date'].date()][item['park']]['hours'] = item for item in passes: # print item if events_lookup.has_key(item['date']): events_lookup[item['date']]['passes'] = item for date, item in forecast.items(): if events_lookup.has_key(date.date()): events_lookup[date.date()]['forecast'] = item f = StringIO() from pprint import pprint pprint(events_lookup) pickle.dump(sorted(events_lookup.values(), key=lambda x:x['date']), f) s3_save(f, 'disneyland-merged_data.pickle') f.seek(0) s3_save(f, 'disney-california-adventure-merged_data.pickle')
def deserialize(self, serialized, **kwargs): value = None # convert to float if serialized is digit if isinstance(serialized, str) and serialized.isdigit(): serialized = float(serialized) if isinstance(serialized, (float, int)): value = datetime.fromtimestamp(serialized) self.set_value(value, **kwargs) elif isinstance(serialized, str): value = du_parse(serialized) self.set_value(value, **kwargs)
def crawl_89ip(): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36' } base_url = 'http://www.89ip.cn/index_%s.html' index = 1 time_now = datetime.now() continue_flag = True while continue_flag: url = base_url % index resp = session.request('get', url, headers=headers) sel = Selector(text=resp.data.decode('utf8')) lst = sel.xpath('//table[@class="layui-table"]/tbody/tr') if not lst: continue_flag = False for item in lst: ip = item.xpath('normalize-space(./td[1])').extract_first() port = item.xpath('normalize-space(./td[2])').extract_first() address = item.xpath('normalize-space(./td[3])').extract_first() _last_check_time = item.xpath( 'normalize-space(./td[5])').extract_first() last_check_time = du_parse(_last_check_time) if not within_delta(time_now, last_check_time, timedelta(days=1)): continue_flag = False print( f"""ip: {ip}, port: {port}, address: {address}, last_check_time: {last_check_time}""" ) proxies = { 'http': f'http://{ip}:{port}', 'https': f'http://{ip}:{port}' } proxy_info = { 'ip': ip, 'port': port, 'address': address, 'type': 'HTTP', 'anonymous': '未知', 'speed': None, 'connect_time': None, 'living_time': None, 'last_check_time': last_check_time, 'source': '89ip' } tasks.append(proxies_checker(proxies, proxy_info)) index += 1 loop.run_until_complete(asyncio.wait(tasks)) conn.commit()
def load_data(slug): """Load the data.""" tpl_url = 'https://gifs.mylesb.ca{}' data = { 'slug': slug, 'image_url': tpl_url.format(url_for('views.gif_image', slug=slug)), 'html_url': tpl_url.format(url_for('views.gif_detail', slug=slug)), 'json_url': tpl_url.format(url_for('views.gif_detail_json', slug=slug)) } gif_file = join(current_app.config['GIFS_PATH'], '{}.gif'.format(slug)) mp4_file = join(current_app.config['GIFS_PATH'], '{}.mp4'.format(slug)) webp_file = join(current_app.config['GIFS_PATH'], '{}.webp'.format(slug)) # If gif doesn't exist rasie a error. if not exists(gif_file): raise Exception if exists(mp4_file): data['mp4'] = True data['mp4_url'] = tpl_url.format( url_for('views.gif_image_mp4', slug=slug)) if exists(webp_file): data['webp'] = True data['webp_url'] = tpl_url.format( url_for('views.gif_image_webp', slug=slug)) image = Image.open(gif_file) data['width'], data['height'] = image.size yaml_file = join(current_app.config['GIFS_PATH'], '{}.yml'.format(slug)) if exists(yaml_file): with open(yaml_file) as fobj: meta = yaml.safe_load(fobj.read()) else: meta = {} if not meta.get('date'): meta['date'] = datetime.fromtimestamp(getmtime(gif_file)) if type(meta.get('date')) != datetime: meta['date'] = du_parse(meta['date']) meta['sort'] = meta['date'].isoformat() data.update(meta) return data
def qy(): base_url = 'http://www.qydaili.com/free/?action=china&page=%s' index = 1 time_now = datetime.now() continue_flag = True while continue_flag: url = base_url % index resp = session.request('get', url) sel = Selector(text=resp.data.decode('utf8')) lst = sel.xpath('//div[@class="container"]/table/tbody/tr') if not lst: continue_flag = False for item in lst: ip = item.xpath('./td[1]/text()').extract_first() port = item.xpath('./td[2]/text()').extract_first() _anonymous = item.xpath('./td[3]/text()').extract_first() anonymous = _anonymous if _anonymous != '匿名' else '普匿' type = item.xpath('./td[4]/text()').extract_first() address = item.xpath('./td[5]/text()').extract_first() speed = item.xpath('./td[6]/text()').extract_first() _last_check_time = item.xpath('./td[7]/text()').extract_first() last_check_time = du_parse(_last_check_time) if not within_delta(time_now, last_check_time, timedelta(days=1)): continue_flag = False print( f"""ip: {ip}, port: {port}, address: {address}, type: {type}, speed: {speed}, last_check_time: {last_check_time}""" ) proxies = { 'http': f'http://{ip}:{port}', 'https': f'http://{ip}:{port}' } proxy_info = { 'ip': ip, 'port': port, 'address': address, 'type': type, 'anonymous': anonymous, 'speed': speed, 'connect_time': None, 'living_time': None, 'last_check_time': last_check_time, 'source': 'qiyun' } tasks.append(proxies_checker(proxies, proxy_info)) index += 1 loop.run_until_complete(asyncio.wait(tasks)) conn.commit()
def parse(self, response): for month in response.css('.calendar'): self.log('Scraping: %s' % month.xpath('div[contains(@class, "month")]/text()').extract()[0]) for day in month.xpath('div[contains(@class, "day") and @original-title]/@original-title').extract(): sel = Selector(text='<div>%s</div>' % day) date_string = sel.xpath('//div/strong/text()').extract()[0] self.log('Parsed date: %s' % date_string) time_range_selection = sel.xpath('//div/text()').extract() if not time_range_selection: continue self.log('Time range: %s' % time_range_selection[0]) if time_range_selection[0] == 'Closed': yield ParkHours( park=self.park_name, date=start_date.date()) continue start, end = time_range_selection[0].replace('Midnight', '12AM').split('-') start_date = du_parse('%s %s' % (date_string, start)) end_date = du_parse('%s %s' % (date_string, end)) yield ParkHours( park=self.park_name, date=start_date.date(), open_time=start_date.time(), close_time=end_date.time())
def test_parse_dates_custom_euro_format(all_parsers, kwargs): parser = all_parsers data = """foo,bar,baz 31/01/2010,1,2 01/02/2010,1,NA 02/02/2010,1,2 """ if "dayfirst" in kwargs: df = parser.read_csv(StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: du_parse(d, **kwargs), header=0, index_col=0, parse_dates=True, na_values=["NA"]) exp_index = Index([datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2)], name="time") expected = DataFrame({"Q": [1, 1, 1], "NTU": [2, np.nan, 2]}, index=exp_index, columns=["Q", "NTU"]) tm.assert_frame_equal(df, expected) else: msg = "got an unexpected keyword argument 'day_first'" with pytest.raises(TypeError, match=msg): parser.read_csv(StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: du_parse(d, **kwargs), skiprows=[0], index_col=0, parse_dates=True, na_values=["NA"])
def parse_month(self, response): for day in response.css('#monthlyCalendar td'): # self.log(str(day)) datelink = day.css('.dayOfMonth').xpath('a/@href') if not datelink: # Day of month is empty, skip continue date = du_parse(datelink.extract()[0].split('/')[-1]) # self.log(str(date)) for park in day.css('.cellSection'): open_time, close_time = park.css('.parkHours').xpath('text()' ).extract()[0].split('(')[0].split('-') yield ParkHours( park=park_name_lookup[park.css('.parkName').xpath('text()').extract()[0]], date=date, open_time=self.parse_time(open_time), close_time=self.parse_time(close_time))
def parseAction(s,l,t): if t[0] in ('on', 'at'): if t[0] == "on": datestr = "%s at %s" % (t[1], "".join(t[3:])) else: datestr = "".join(t[1:]) self._e.event.time = du_parse(datestr) else: now = datetime.datetime.now() if t[-1] in 'mM': self._e.event.time = now + datetime.timedelta(minutes=int(t[1])) elif t[-1] in 'hH': self._e.event.time = now + datetime.timedelta(hours=int(t[1])) elif t[-1] in 'dD': self._e.event.time = now + datetime.timedelta(days=int(t[1])) if self._e.event.time < datetime.datetime.now(): self._e.event.time += datetime.timedelta(days=1) return ""
def parseAction(s, l, t): if t[0] in ('on', 'at'): if t[0] == "on": datestr = "%s at %s" % (t[1], "".join(t[3:])) else: datestr = "".join(t[1:]) self._e.event.time = du_parse(datestr) else: now = datetime.datetime.now() if t[-1] in 'mM': self._e.event.time = now + datetime.timedelta( minutes=int(t[1])) elif t[-1] in 'hH': self._e.event.time = now + datetime.timedelta( hours=int(t[1])) elif t[-1] in 'dD': self._e.event.time = now + datetime.timedelta( days=int(t[1])) if self._e.event.time < datetime.datetime.now(): self._e.event.time += datetime.timedelta(days=1) return ""
def parse(box_scores, from_date, to_date): return [{ "MP": get_seconds(box_score.playing_time), "FGM": str_to_int(box_score.made_field_goals), "FGA": str_to_int(box_score.attempted_field_goals), "3PM": str_to_int(box_score.made_three_point_field_goals), "3PA": str_to_int(box_score.attempted_three_point_field_goals), "FTM": str_to_int(box_score.made_free_throws), "FTA": str_to_int(box_score.attempted_free_throws), "ORB": str_to_int(box_score.offensive_rebounds), "DRB": str_to_int(box_score.defensive_rebounds), "TRB": str_to_int(box_score.total_rebounds), "AST": str_to_int(box_score.assists), "STL": str_to_int(box_score.steals), "BLK": str_to_int(box_score.blocks), "TOV": str_to_int(box_score.turnovers), "PF": str_to_int(box_score.personal_fouls), "PTS": str_to_int(box_score.points_scored), "GmSc": str_to_float(box_score.game_score), "+/-": str_to_int(box_score.plus_minus), } for box_score in box_scores if box_score.is_active and from_date <= du_parse(box_score.date) < to_date]
def process_datetimes(self, string, date_choices): self.log('Processing datetimes: %s' % string) s = string.replace(u'–', u'-').replace(u'\xa0', ' ').replace(u'\xc2', u' ') location = None if 'Location' in s: times, location = re.findall(r'([\w,:;&. ]+)\(Location: ([\w\-,& .]+)\)', s)[0] dates = [x for x in date_choices if x is not None] elif '(' in s: times, dates = re.findall(r'([\w,:;&. ]+)\(([\w\-,& .]+)\)', s)[0] dates = self.process_dates(dates, date_choices) else: times = re.findall(r'([\w,:;&. ]+)', s)[0] dates = [x for x in date_choices if x is not None] self.log('Processed dates: %s' % dates) self.log('Processing times: %s' % times) times = re.split('[,;&]', times) times = map(lambda x:x.strip(), times) times = filter(lambda x:x, times) times = [(x.endswith('m.') or x.endswith('m')) and x or (x + ' p.m.') for x in times if x] times = map(lambda x:du_parse(x).time(), times) self.log('Processed times: %s' % times) return ((x, times) for x in dates), location
def parse(self, response): """Convert Finding search response into a more readable format. Parameters ---------- response : list a search response Returns ------- Cleaned up search results : list Examples -------- >>> finding = Finding(country='UK') >>> opts = {'keywords': 'Harry Potter'} >>> response = finding.search(opts) >>> parsed = finding.parse(response) >>> set(parsed) == {'message', 'results', 'pages'} True >>> item = list(parsed['results'].values())[0] >>> set(item) == { ... 'end_time', 'buy_now_price', 'url', 'currency', 'end_date', ... 'shipping', 'buy_now_price_and_shipping', 'title', 'id', ... 'condition', 'price_and_shipping', 'item_type', 'price', ... 'end_date_time', 'country'} True >>> 'www.ebay.co.uk' in item['url'] True """ items = [] currency = self.global_ids[self.kwargs['country']]['currency'] result = Andand(response).searchResult.item([]) pages = Andand(response).paginationOutput.totalPages(0) if result and hasattr(result, 'update'): # one result result = [result] for r in result: date_time = du_parse(r['listingInfo']['endTime']) end_date = date_time.strftime("%Y-%m-%d") end_time = date_time.strftime("%H:%M") offset_years = int(date_time.strftime("%Y")) - 2010 year_in_sec = offset_years * 365 * 24 * 60 * 60 days_in_sec = int(date_time.strftime("%j")) * 24 * 60 * 60 hours_in_sec = int(date_time.strftime("%H")) * 60 * 60 minutes_in_sec = int(date_time.strftime("%M")) * 60 secs_in_sec = int(date_time.strftime("%S")) args = [year_in_sec, days_in_sec, hours_in_sec, minutes_in_sec] args.append(secs_in_sec) end_date_time = sum(args) price = float(Andand(r).sellingStatus.currentPrice.value(0)) buy_now_price = float(Andand(r).listingInfo.buyItNowPrice.value(0)) shipping = float( Andand(r).shippingInfo.shippingServiceCost.value(0)) condition = Andand(r).condition.conditionDisplayName() price_and_shipping = price + shipping buy_now_price_and_shipping = buy_now_price + shipping item = { 'id': str(r['itemId']), 'url': r['viewItemURL'], 'title': r['title'], 'condition': condition, 'item_type': r['listingInfo']['listingType'], 'price': price, 'buy_now_price': buy_now_price, 'shipping': shipping, 'price_and_shipping': price_and_shipping, 'buy_now_price_and_shipping': buy_now_price_and_shipping, 'end_date_time': end_date_time, 'end_date': end_date, 'end_time': end_time, 'country': self.kwargs['country'], 'currency': currency, } items.append(item) results = {r['id']: r for r in items} message = response.get('message') return {'results': results, 'pages': pages, 'message': message}
def parse(self, response): if self.done_events_done == False: self.done_events_done = True for e in self.done_events: if e['date'].date() >= datetime.date.today(): yield DisneyDay( date = e['date'], park = e['park'], open_time = e['open_time'], close_time = e['close_time'], parades = e['parades'], night_shows = e['night_shows'], events = e['events'], shows = e['shows'], park_atmosphere = e['park_atmosphere'] ) self.log(str(response.meta['_splash_processed']['args']['url'])+ ' - ' + str(len(response.css('.parades')))) if len(response.css('.parades')) == 0: self.log('Page did not load properly, resending...') yield Request( response.meta['_splash_processed']['args']['url'], self.parse, dont_filter=True, meta={ 'splash': response.meta['_splash_processed'] }) else: date = du_parse(response.css('.date-second').xpath( 'text()').extract()[0]) for park in self.parks: park_div = response.css('#%s' % park) open_time, close_time = map( lambda x:self.process_time(date, x), park_div.css( '.parkHours').xpath( 'p/text()')[1].extract().split('to')) schedule_content = {} for content_type, content_class in self.schedule_contenttypes: content_type_buffer = [] for content_element in park_div.css('.eventDetail.%s' % content_class): if content_element.css('.scheduleUnavailableMessage'): continue name = content_element.css('.eventText').xpath( 'text()').extract()[0].strip() time_string = content_element.css( '.operatingHoursContainer').xpath( 'text()').extract()[0] if 'to' in time_string: times = self.process_time( date, time_string.split('to')[0].strip()) else: times = map( lambda x:self.process_time(date, x), time_string.split(',')) content_type_buffer.append((name, times)) schedule_content[content_type] = content_type_buffer yield DisneyDay( park=park, date=date, open_time=open_time, close_time=close_time, **schedule_content)
def _create_tile_record(t, channel_id, locale): """Given a tile, create a record for the database queries :param t: tile data channel_id: an integer channel id locale: locale string :return a dictionary """ image_hash = hashlib.sha1(t["imageURI"]).hexdigest() enhanced_image_hash = hashlib.sha1(t.get("enhancedImageURI")).hexdigest() \ if "enhancedImageURI" in t else None # deduplicate and sort frecent_sites frecent_sites = sorted(set(t.get("frecent_sites", []))) if frecent_sites: t['frecent_sites'] = frecent_sites # deduplicate and sort adgroup_categories adgroup_categories = sorted(set(t.get("adgroup_categories", []))) if adgroup_categories: t['adgroup_categories'] = adgroup_categories frequency_caps = t.get("frequency_caps", {"daily": 0, "total": 0}) adgroup_name = bleach.clean(t.get("adgroup_name", ""), strip=True) or None explanation = bleach.clean(t.get("explanation", ""), strip=True) or None check_inadjacency = False if 'check_inadjacency' in t: check_inadjacency = t['check_inadjacency'] # we have both the string and datetime objects to allow for optional timezones on the client time_limits = t.get("time_limits", { 'start': None, 'end': None, 'start_dt': None, 'end_dt': None }) if time_limits.get('start') or time_limits.get('end'): time_limits.update({ 'start_dt': du_parse(time_limits['start']) if time_limits.get('start') else None, 'end_dt': du_parse(time_limits['end']) if time_limits.get('end') else None }) for dt_name in ('start_dt', 'end_dt'): dt = time_limits[dt_name] if dt and dt.tzinfo: # capture the datetime as UTC, but without the Timezone info # check because input may be TZ-unaware time_limits[dt_name] = dt.astimezone( pytz.utc).replace(tzinfo=None) frequency_caps = t.get("frequency_caps", {"daily": 0, "total": 0}) return dict( target_url=t["url"], bg_color=t["bgColor"], title_bg_color=t.get("titleBgColor"), title=t["title"], typ=t["type"], image_uri=image_hash, enhanced_image_uri=enhanced_image_hash, locale=locale, frecent_sites=frecent_sites, time_limits=time_limits, frequency_caps=frequency_caps, adgroup_name=adgroup_name, adgroup_categories=adgroup_categories, explanation=explanation, check_inadjacency=check_inadjacency, channel_id=channel_id, )
def devPrepareForThingspeak(self, dev, parms): """ Upload data to Thingspeak and evaluate the result This method performs the upload to Thingspeak, evaluates, and logs the result. ----- :param dev: :param parms: """ url = "/update.json" response, response_dict = self.sendToThingspeak('post', url, parms) # Process the results. Thingspeak will respond with a "0" if something went # wrong. if response == 0: self.logger.warning(u"Something went wrong.") self.logger.warning(u"{0}".format(response_dict)) return False if response == 200: dev.updateStateOnServer('channel_id', value=int( response_dict.get('channel_id', "0"))) dev.updateStateOnServer('elevation', value=int( response_dict.get('elevation', "0"))) dev.updateStateOnServer('entry_id', value=int( response_dict.get('entry_id', "0"))) dev.updateStateOnServer('latitude', value=float( response_dict.get('latitude', "0"))) dev.updateStateOnServer('longitude', value=float( response_dict.get('longitude', "0"))) dev.updateStateOnServer('status', value=response_dict.get('status', "0")) # For thing values 1-8 for _ in range(1, 9): dev.updateStateOnServer('thing{0}'.format(_), value=response_dict.get( 'field{0}'.format(_), "0")) # Convert UTC return to local time. There is an optional timezone parameter # that can be used in the form of: time_zone="timezone=America%2FChicago&" # For now, we will convert to UTC locally. if response_dict['created_at']: time = t.time() # time_delta_to_utc formula thanks to Karl (kw123). time_delta_to_utc = (int( t.mktime( dt.datetime.utcfromtimestamp(time + 10).timetuple()) - time) / 100) * 100 utc_obj = du_parse(response_dict['created_at']) local_time = str(utc_obj - dt.timedelta(seconds=time_delta_to_utc)) dev.updateStateOnServer('created_at', value=local_time) else: dev.updateStateOnServer('created_at', value=u"Unknown") new_props = dev.pluginProps new_props['address'] = dev.states['channel_id'] dev.replacePluginPropsOnServer(new_props) dev.updateStateOnServer('thingState', value=True, uiValue=u"OK") dev.updateStateImageOnServer(indigo.kStateImageSel.SensorOn) return True
def encodeValueDicts(self): """ Encode the data dicts for upload to Thingspeak The encodeValueDicts() method is called when a device makes a call to upload data to Thingspeak. ----- """ api_key = None thing_dict = {} for dev in indigo.devices.itervalues("self"): # A device has been created, but hasn't been saved yet. if not dev.configured: indigo.server.log( u"A device is being (or has been) created, but it's not fully configured. " u"Sleeping while you finish.") continue elif not dev.enabled: continue # Iterate over up to 8 values per device. elif dev.enabled: # For each device, see if it is time for an update last_update = dev.states.get('created_at', '1970-01-01 00:00:00+00:00') if last_update == '': last_update = '1970-01-01 00:00:00' last_update = du_parse(last_update) delta = dt.datetime.now().replace( tzinfo=pytz.utc) - last_update.replace(tzinfo=pytz.utc) delta = int(delta.total_seconds()) if self.uploadNow or delta > int( dev.pluginProps['devUploadInterval']): dev.updateStateOnServer('thingState', value=False, uiValue="processing") channel_id = dev.pluginProps['channelList'] url = "/channels.json" parms = {'api_key': self.pluginPrefs.get('apiKey', '')} response, response_dict = self.sendToThingspeak( 'get', url, parms) # Find the write api key for this channel (we go and get it in case it's changed. for thing in response_dict: if str(thing['id']) == str(channel_id): for key in thing['api_keys']: if key['write_flag']: api_key = key['api_key'] if not api_key: return for v in range(1, 9): thing_str = 'thing{0}'.format(v) thing_state_str = 'thing{0}State'.format(v) # Create the dict and add the API key to it. thing_dict['key'] = api_key # If there is a device created, but no value assigned. if not dev.pluginProps[thing_str] or dev.pluginProps[ thing_str] == "None": var = "Null value" else: thing_1 = dev.pluginProps[thing_str] state_1 = dev.pluginProps[thing_state_str] self.logger.debug(u"{0:{1}^22}".format('', ' ')) self.logger.debug(u"ID: {0}".format(thing_1)) self.logger.debug(u"Item: {0}".format(state_1)) # If it's a device state, do this: if int(thing_1) in indigo.devices: try: var = indigo.devices[int( thing_1)].states[state_1] var = self.onlyNumerics(var) self.logger.debug( u"Value: {0}".format(var)) except ValueError: self.Fogbert.pluginErrorHandler( traceback.format_exc()) self.logger.warning( u"{0} - {1} is non-numeric or has been removed. " u"Will try to upload, but it won't " u"chart.".format( dev.name, dev.pluginProps[thing_str])) var = u"undefined" # Add device state value to dictionary. thing_dict['field' + str(v)] = var # If it's a variable value, do this: elif int(thing_1) in indigo.variables: var = indigo.variables[int(thing_1)].value try: var = self.onlyNumerics(var) self.logger.debug( u"Value: {0}".format(var)) except ValueError: self.Fogbert.pluginErrorHandler( traceback.format_exc()) self.logger.warning( u"{0} - {1} is non-numeric or has been removed. " u"Will try to upload, but it won't " u"chart.".format( dev.name, dev.pluginProps[thing_str])) # Add variable value to dictionary. thing_dict['field' + str(v)] = var thing_dict['elevation'] = self.pluginPrefs['elevation'] thing_dict['latitude'] = self.pluginPrefs['latitude'] thing_dict['longitude'] = self.pluginPrefs['longitude'] thing_dict['twitter'] = self.pluginPrefs['twitter'] thing_dict['tweet'] = u"{0}".format( dev.pluginProps['tweet']) self.logger.debug(unicode(thing_dict)) # Open a connection and upload data to Thingspeak try: # The plugin uploads variable values before moving on to the next one. Will # continue until no more devices or the plugin throws an exception. self.logger.debug(u"{0}: Channel updating...".format( dev.name)) self.devPrepareForThingspeak(dev, thing_dict) except Exception: self.Fogbert.pluginErrorHandler(traceback.format_exc()) self.logger.debug("{0} - Curl Return Code: {1}".format( dt.datetime.time(dt.datetime.now()), response)) self.logger.debug( "{0} - Curl Response Dict: {1}".format( dt.datetime.time(dt.datetime.now()), response_dict, )) else: continue self.uploadNow = False # If we've come here manually self.updating = False # If we've come here automatically return
frequency_caps = t.get("frequency_caps", {"daily": 0, "total": 0}) adgroup_name = bleach.clean(t.get("adgroup_name", ""), strip=True) or None explanation = bleach.clean(t.get("explanation", ""), strip=True) or None check_inadjacency = False if 'check_inadjacency' in t: check_inadjacency = t['check_inadjacency'] # we have both the string and datetime objects to allow for optional timezones on the client time_limits = t.get("time_limits", { 'start': None, 'end': None, 'start_dt': None, 'end_dt': None }) if time_limits.get('start') or time_limits.get('end'): time_limits.update({ 'start_dt': du_parse(time_limits['start']) if time_limits.get('start') else None, 'end_dt': du_parse(time_limits['end']) if time_limits.get('end') else None }) for dt_name in ('start_dt', 'end_dt'): dt = time_limits[dt_name] if dt and dt.tzinfo: # capture the datetime as UTC, but without the Timezone info # check because input may be TZ-unaware time_limits[dt_name] = dt.astimezone(pytz.utc).replace(tzinfo=None) frequency_caps = t.get("frequency_caps", {"daily": 0, "total": 0}) columns = dict( target_url=t["url"], bg_color=t["bgColor"], title=t["title"],
def main(): park_hours = load_pickles('knotts-hours.pickle') soak_city_hours = load_pickles('soakcity-hours.pickle') show_times = load_pickles('knotts-showtimes.pickle') knotts_spreadsheet = petl.fromcsv( 'https://spreadsheets.google.com/tq?key=%s&gid=0&tqx=out:csv' % KNOTTS_SPREADSHEET_KEY ) forecast = get_forecast() spreadsheet_lookup = {} for item in knotts_spreadsheet.dicts(): spreadsheet_lookup[du_parse(item['date']).date()] = { 'crowd_level': item['crowd_level'], 'closures': [ x for x in map(unicode.strip, item['closures'].split(',')) if x] } # Condense the event listing to one listing per event ignoring location condensed_st = {} for item in show_times: key = (item['date'], item['name']) buff = condensed_st.get(key) if buff: buff['times'] = sorted(buff['times'] + item['times']) else: buff = item condensed_st[key] = buff show_times_lookup = {} for item in condensed_st.values(): show_times_lookup[item['date']] = show_times_lookup.get( item['date'], []) + [{ 'name': item['name'], 'times': item['times']}] # 'location': item['location']}] # We don't use loc, ignore soak_city_hours_lookup = {} for item in soak_city_hours: soak_city_hours_lookup[item['date']] = { 'open_time': item.get('open_time'), 'close_time': item.get('close_time') } new_park_hours = [] for day in park_hours: st = show_times_lookup.get(day['date']) if st: day['show_times'] = sorted(st, key=lambda x:x['name']) ss = soak_city_hours_lookup.get(day['date']) if ss: day['soak_city_hours'] = ss sheet_item = spreadsheet_lookup.get(day['date']) if sheet_item: day.update(sheet_item) forecast_item = forecast.get(day['date']) if forecast_item: day['temperatureMax'] = forecast_item.temperatureMax day['temperatureMin'] = forecast_item.temperatureMin day['weatherIcon'] = forecast_item.icon new_park_hours.append(day) f = StringIO() pickle.dump(new_park_hours, f) s3_save(f, 'merged_data.pickle')
def main(use_args=None): "Wrapper for when we're called from the command line." from argparse import ArgumentParser import os parser = ArgumentParser( description="Given a file with lines containing an " "event timestamp, optionally a count, and a description of the event, " "plot a histogram summary of those events" ) parser.add_argument("eventFile", nargs="+", help="File with input data") parser.add_argument( "-c", "--count", action="store_true", help="Does the input file also include a count of how many times an " "event occurred, immediately after the timestamp?", ) parser.add_argument( "-a", "--axis", type=int, help="Range, in hours, for each x-axis to span" ) parser.add_argument( "-b", "--bucket", type=int, help="Width, in minutes, for each histogram bucket" ) parser.add_argument( "-r", "--rank-by-total", action="store_true", help="Rank events in the same histogram by the total", ) parser.add_argument( "--bicolor", action="store_true", help="Plot a bicolor graph (where each event is displayed in alternating " "dark and light colors)? Works best with --count", ) parser.add_argument( "-n", "--top-n", type=int, help="How many of the top events should be showed separately?", ) parser.add_argument( "-q", "--equal-y-axes", action="store_true", help="Make the range of every separate y-axis the same?", ) parser.add_argument("-y", "--ylabel", help="Label for each y-axis") parser.add_argument("-t", "--title", help="Title for each graph") parser.add_argument("-s", "--suptitle", help="Super-title for all graphs") parser.add_argument( "-o", "--output-file", default="events.png", help='Output filename (default "events.png")', ) opp = parser.parse_args(args=use_args) assert not opp.rank_by_total or opp.count, \ "--rank-by-total is only meaningful with --count" self = EventPlotter() for fname in opp.eventFile: with open(fname) as fhh: for lnum, line in enumerate(fhh.readlines()): lsp = line.split() count = 1 if opp.count: assert ( len(lsp) >= 3 ), "File {} line {} has less than 3 fields".format(fname, lnum + 1) assert lsp[1].isdigit(), ( "File {} line {} second column value '{}' isn't a count" ).format(fname, lnum + 1, lsp[1]) count = int(lsp[1]) estr = " ".join(lsp[2:]) else: assert ( len(lsp) >= 2 ), "File {} line {} has less than 2 fields".format(fname, lnum + 1) estr = " ".join(lsp[1:]) dval = du_parse(lsp[0]) self.add(dval, estr, count=count) fig = self.plot( axis=opp.axis, bucket=opp.bucket, rank_by_total=opp.rank_by_total, top_n=opp.top_n, bicolor=opp.bicolor, equal_y_axes=opp.equal_y_axes, ylabel=opp.ylabel, title=opp.title, suptitle=opp.suptitle, ) ofile = os.path.realpath(os.path.expanduser(opp.output_file)) jlog("Writing output to {}".format(ofile)) fig.savefig(ofile) return fig
def _create_tile_record(t, channel_id, locale): """Given a tile, create a record for the database queries :param t: tile data channel_id: an integer channel id locale: locale string :return a dictionary """ image_hash = hashlib.sha1(t["imageURI"]).hexdigest() enhanced_image_hash = hashlib.sha1(t.get("enhancedImageURI")).hexdigest() \ if "enhancedImageURI" in t else None # deduplicate and sort frecent_sites frecent_sites = sorted(set(t.get("frecent_sites", []))) if frecent_sites: t['frecent_sites'] = frecent_sites # deduplicate and sort adgroup_categories adgroup_categories = sorted(set(t.get("adgroup_categories", []))) if adgroup_categories: t['adgroup_categories'] = adgroup_categories frequency_caps = t.get("frequency_caps", {"daily": 0, "total": 0}) adgroup_name = bleach.clean(t.get("adgroup_name", ""), strip=True) or None explanation = bleach.clean(t.get("explanation", ""), strip=True) or None check_inadjacency = False if 'check_inadjacency' in t: check_inadjacency = t['check_inadjacency'] # we have both the string and datetime objects to allow for optional timezones on the client time_limits = t.get("time_limits", { 'start': None, 'end': None, 'start_dt': None, 'end_dt': None }) if time_limits.get('start') or time_limits.get('end'): time_limits.update({ 'start_dt': du_parse(time_limits['start']) if time_limits.get('start') else None, 'end_dt': du_parse(time_limits['end']) if time_limits.get('end') else None }) for dt_name in ('start_dt', 'end_dt'): dt = time_limits[dt_name] if dt and dt.tzinfo: # capture the datetime as UTC, but without the Timezone info # check because input may be TZ-unaware time_limits[dt_name] = dt.astimezone(pytz.utc).replace(tzinfo=None) frequency_caps = t.get("frequency_caps", {"daily": 0, "total": 0}) return dict( target_url=t["url"], bg_color=t["bgColor"], title_bg_color=t.get("titleBgColor"), title=t["title"], typ=t["type"], image_uri=image_hash, enhanced_image_uri=enhanced_image_hash, locale=locale, frecent_sites=frecent_sites, time_limits=time_limits, frequency_caps=frequency_caps, adgroup_name=adgroup_name, adgroup_categories=adgroup_categories, explanation=explanation, check_inadjacency=check_inadjacency, channel_id=channel_id, )