def main(): print "So it begins..." target_db_uri = Environment.instance().config.SQLALCHEMY_DATABASE_URI target_connection = psycopg2.connect(target_db_uri) target_cursor = target_connection.cursor() db_uri = Environment.instance().config.SQLALCHEMY_BINDS['stats'] connection = psycopg2.connect(db_uri) cursor = connection.cursor() try: for table_name, columns in TABLES: print "Copying ", table_name # first, we need to lock the source table (ideally) # the problem with this is that we would need to modify permissions for production to allow # the lock for the user configured in SQLALCHEMY_DATABASE_URI # cursor.execute("LOCK TABLE %s" % table_name) # we need to assert the table is empty print "Counting ", table_name target_cursor.execute("select count(*) from %s" % table_name) count, = target_cursor.fetchone() assert count == 0, "Table %s " col_string = ','.join(columns) str_string = ','.join(["%s"] * len(columns)) target_query = StringIO() target_query.write('insert into %s(%s) values ' % (table_name, col_string)) print "Reading ", table_name cursor.execute('select %s from %s' % (col_string, table_name)) for rec in cursor: target_query.write("(%s)," % target_cursor.mogrify(str_string, tuple(rec))) print "Writing ", table_name target_cursor.execute(target_query.getvalue()[:-1]) # now we need to reset the sequence associated with the id for this table target_cursor.execute("select max(id) + 1 from %s" % table_name) nextone, = target_cursor.fetchone() print "Updating sequence for ", table_name target_cursor.execute("SELECT setval('%s_id_seq', %s, false)" % (table_name, nextone)) print "Done ", table_name except Exception as e: print "Error ", e target_connection.rollback() connection.rollback() else: print "Good, well done, excellent." target_connection.commit() connection.commit() finally: connection.close() target_connection.close()
def insert_distribution(url, channel_id, deployed, scheduled_dt, *args, **kwargs): from splice.environment import Environment # ensure that on insert, a distribution is either deployed or scheduled, not both if scheduled_dt is not None: deployed = False env = Environment.instance() conn = env.db.engine.connect() trans = conn.begin() try: conn.execute( text( "INSERT INTO distributions (" " url, channel_id, deployed, scheduled_start_date, created_at" ") " "VALUES (" " :url, :channel_id, :deployed, :scheduled_start_date, :created_at" ")" ), url=url, channel_id=channel_id, deployed=deployed, scheduled_start_date=scheduled_dt, created_at=datetime.utcnow() ) trans.commit() except: trans.rollback() raise
def tile_exists(target_url, bg_color, title, type, image_uri, enhanced_image_uri, locale, conn=None, *args, **kwargs): """ Return the id of a tile having the data provided """ from splice.environment import Environment env = Environment.instance() if conn is not None: sm = sessionmaker(bind=conn) session = sm() else: session = env.db.session # we add order_by in the query although it shouldn't be necessary # this is because of a previous bug where duplicate tiles could be created results = ( session .query(Tile.id) .filter(Tile.target_url == target_url) .filter(Tile.bg_color == bg_color) .filter(Tile.title == title) .filter(Tile.image_uri == image_uri) .filter(Tile.enhanced_image_uri == enhanced_image_uri) .filter(Tile.locale == locale) .order_by(asc(Tile.id)) .first() ) if results: return results[0] return results
def get_scheduled_distributions(minutes, dt_query=None): """ Returns distributions scheduled from a point in time, and a leniency period within which a tasks could've been scheduled closed to that point. As a regular task, it is intended to run at least once hourly. :minutes: amount of time in the past from the query time which is still viable :dt_query: optionally set the date time to find schedules for """ from splice.environment import Environment env = Environment.instance() if not minutes or not (0 < minutes < 60): raise ValueError("minutes needs to be a number between 1..59 inclusive") if dt_query is None: dt_query = datetime.utcnow() # getting around PEP8 E712 warning. This is necessary for SQLAlchemy false_value = False min_query_dt = dt_query - timedelta(minutes=minutes) stmt = ( env.db.session .query(Distribution) .filter(Distribution.deployed == false_value) .filter(Distribution.scheduled_start_date.between(min_query_dt, dt_query)) ) dists = stmt.all() return dists
def get_campaigns(account_id=None, past=True, in_flight=True, scheduled=True, utctoday=None): from splice.environment import Environment env = Environment.instance() query = env.db.session.query(Campaign) if account_id is not None: query = query.filter(Campaign.account_id == account_id) if utctoday is None: utctoday = datetime.utcnow().date() rows = query.order_by(Campaign.id.desc()).all() campaigns = [] for row in rows: ret = row_to_dict(row) countries = [] for country in row.countries: countries.append(country.country_code) ret['countries'] = countries # filter based on start and end dates unless an account ID is specified if ((past and row.end_date.date() <= utctoday) or (in_flight and row.end_date.date() >= utctoday >= row.start_date.date()) or (scheduled and row.start_date.date() >= utctoday)): campaigns.append(ret) return campaigns
def get_upcoming_distributions(limit=100, leniency_minutes=15, include_past=False): """ Obtain distributions, partitioned by channels with up to ``limit`` results for each channel :leniency_minutes: have a leniency in minutes up to the present when looking for distributions :include_past: always return all past distributions """ from splice.environment import Environment env = Environment.instance() # getting around PEP8 E712 warning. This is necessary for SQLAlchemy false_value = False dist_cte = ( env.db.session .query( Distribution.id, Distribution.channel_id, Distribution.url, Distribution.created_at, Distribution.scheduled_start_date, func.row_number().over( partition_by=Distribution.channel_id, order_by=Distribution.scheduled_start_date.asc()) .label('row_num') ) .filter(Distribution.deployed == false_value)) if not include_past: min_dt = datetime.utcnow() - timedelta(minutes=leniency_minutes) dist_cte = ( dist_cte .filter(Distribution.scheduled_start_date >= min_dt)) dist_cte = dist_cte.cte() stmt = ( env.db.session .query( dist_cte.c.id, dist_cte.c.channel_id, dist_cte.c.url, dist_cte.c.created_at, dist_cte.c.scheduled_start_date) .filter(dist_cte.c.row_num <= limit) .order_by(dist_cte.c.scheduled_start_date.asc()) ) rows = stmt.all() channels = {} for row in rows: c_dists = channels.setdefault(row.channel_id, []) c_dists.append({'id': row.id, 'url': row.url, 'created_at': row.created_at, 'scheduled_at': row.scheduled_start_date}) return channels
def test_get_all_categories(self): """ Test for getting all categories""" url = url_for('api.init.init', target="categories") response = self.client.get(url) assert_equal(response.status_code, 200) categories = json.loads(response.data)['results'] categories_fixture = Environment.instance()._load_categories() assert_equal(categories, categories_fixture)
def _update_image(bucket, image_url, tile_id, column='image_uri'): env = Environment.instance() if image_url and not image_url.startswith('http'): imgs = list(bucket.list(prefix="images/%s" % image_url)) if len(imgs): uri = os.path.join('https://%s.s3.amazonaws.com' % env.config.S3['bucket'], imgs[0]) print "updating %s for tile=%s" % (column, tile_id) return "update tiles set %s = '%s' where id = %s" % (column, uri, tile_id) return None
def test_get_all_locale(self): """ Test for getting all locales""" url = url_for('api.init.init', target="locales") response = self.client.get(url) assert_equal(response.status_code, 200) locales = json.loads(response.data)['results'] locales_fixture = Environment.instance()._load_locales()[:-1] locales_fixture.sort() assert_equal(locales, locales_fixture)
def get_account(id): from splice.environment import Environment env = Environment.instance() row = ( env.db.session .query(Account).get(id) ) return row_to_dict(row) if row else None
def test_get_all_countries(self): """ Test for getting all countries""" url = url_for('api.init.init', target="countries") response = self.client.get(url) assert_equal(response.status_code, 200) countries = json.loads(response.data)['results'] countries_fixture = Environment.instance()._load_countries()[:-1] items = [{"country_code": code, "country_name": name} for code, name in countries_fixture] assert_equal(countries, items)
def get_accounts(): from splice.environment import Environment env = Environment.instance() rows = ( env.db.session .query(Account) .order_by(Account.id.desc()) .all() ) output = [row_to_dict(d) for d in rows] return output
def get_channels(limit=100): from splice.environment import Environment env = Environment.instance() rows = (env.db.session.query(Channel.id, Channel.name, Channel.created_at).order_by( Channel.id.asc()).limit(limit).all()) # ensure items are a list of dicts # KeyedTuples may serialize differently on other systems output = [d._asdict() for d in rows] return output
def get_content(name): from splice.environment import Environment env = Environment.instance() row = env.db.session.query(Content).filter(Content.name == name).first() c = row_to_dict(row) if row else None if c is not None: versions = [] for version in row.versions: versions.append(row_to_dict(version)) c['versions'] = versions return c
def setup_s3(bucket="bucket"): from splice.environment import Environment from boto.s3.cors import CORSConfiguration env = Environment.instance() bucket = env.s3.get_bucket(env.config.S3[bucket]) cors = CORSConfiguration() cors.add_rule("GET", "*", allowed_header="*") bucket.set_cors(cors) headers = { 'Cache-Control': 'public, max-age=31536000', 'Content-Disposition': 'inline', } return bucket, headers
def get_campaign(campaign_id): from splice.environment import Environment env = Environment.instance() row = (env.db.session.query(Campaign).get(campaign_id)) if row: ret = row_to_dict(row) countries = [] for country in row.countries: countries.append(country.country_code) ret['countries'] = countries return ret else: return None
def tile_exists(target_url, bg_color, title, typ, image_uri, enhanced_image_uri, locale, frecent_sites, time_limits, frequency_caps, adgroup_name, explanation, check_inadjacency, channel_id, conn=None, *args, **kwargs): """ Return the id of a tile having the data provided """ from splice.environment import Environment env = Environment.instance() if conn is not None: sm = sessionmaker(bind=conn) session = sm() else: session = env.db.session # we add order_by in the query although it shouldn't be necessary # this is because of a previous bug where duplicate tiles could be created results = ( session .query(Tile.id, Tile.adgroup_id) .filter(Tile.target_url == target_url) .filter(Tile.bg_color == bg_color) .filter(Tile.title == title) .filter(Tile.type == typ) .filter(Tile.image_uri == image_uri) .filter(Tile.enhanced_image_uri == enhanced_image_uri) .filter(Adgroup.locale == locale) .filter(Adgroup.start_date == time_limits.get('start')) .filter(Adgroup.end_date == time_limits.get('end')) .filter(Adgroup.start_date_dt == time_limits.get('start_dt')) .filter(Adgroup.end_date_dt == time_limits.get('end_dt')) .filter(Adgroup.frequency_cap_daily == frequency_caps['daily']) .filter(Adgroup.frequency_cap_total == frequency_caps['total']) .filter(Adgroup.name == adgroup_name) .filter(Adgroup.explanation == explanation) .filter(Adgroup.check_inadjacency == check_inadjacency) .filter(Adgroup.channel_id == channel_id) .join(Adgroup.tiles) .order_by(asc(Tile.id)) ) if results: for tile_id, adgroup_id in results: # now check frecent sites for this tile db_frecents = get_frecent_sites_for_tile(tile_id, conn) if db_frecents == sorted(set(frecent_sites)): return tile_id, adgroup_id return None, None
def get_adgroup(id): from splice.environment import Environment env = Environment.instance() row = (env.db.session.query(Adgroup).get(id)) if row is None: return None new = row_to_dict(row) categories = [] for category in row.categories: categories.append(category.category) new['categories'] = categories return new
def test_single_creative_upload_endpoint(self): """Test the API endpoint for the single creative upload""" from splice.environment import Environment env = Environment.instance() url = url_for('api.tile.handler_creative_upload') with zipfile.ZipFile(self.zip_file, "r") as zf: f = zf.getinfo("samples/firefox_mdn_a.png") data = {'creative': (StringIO.StringIO(zf.read(f)), 'creative.png')} response = self.client.post(url, data=data) assert_equal(response.status_code, 200) creative_url = json.loads(response.data)['result'] bucket = env.s3.get_bucket(env.config.S3["bucket"]) s3_key = os.path.basename(creative_url) key = bucket.get_key(s3_key) self.assertIsNotNone(key)
def get_contents(): from splice.environment import Environment env = Environment.instance() rows = (env.db.session.query(Content).order_by(Content.id.desc()).all()) output = [] for d in rows: versions = [] for version in d.versions: versions.append(row_to_dict(version)) c = row_to_dict(d) c['versions'] = versions output.append(c) return output
def get_tiles(): from splice.environment import Environment env = Environment.instance() rows = ( env.db.session .query(Tile.id, Tile.adgroup_id, Tile.title, Tile.type, Tile.bg_color, Tile.target_url) .order_by(Tile.id.asc()) .all() ) # ensure items are a list of dicts # KeyedTuples may serialize differently on other systems output = [d._asdict() for d in rows] return output
def get_adgroups(): from splice.environment import Environment env = Environment.instance() rows = ( env.db.session .query(Adgroup.id, Adgroup.locale) .order_by(Adgroup.id) .all() ) # ensure items are a list of dicts # KeyedTuples may serialize differently on other systems output = [d._asdict() for d in rows] return output
def get_channels(limit=100): from splice.environment import Environment env = Environment.instance() rows = ( env.db.session .query(Channel.id, Channel.name, Channel.created_at) .order_by(Channel.id.asc()) .limit(limit) .all() ) # ensure items are a list of dicts # KeyedTuples may serialize differently on other systems output = [d._asdict() for d in rows] return output
def get_adgroups_by_campaign_id(campaign_id): from splice.environment import Environment env = Environment.instance() rows = (env.db.session.query(Adgroup).filter( Adgroup.campaign_id == campaign_id).order_by(Adgroup.id.desc()).all()) output = [] for d in rows: new = row_to_dict(d) categories = [] for category in d.categories: categories.append(category.category) new['categories'] = categories output.append(new) return output
def get_campaign(campaign_id): from splice.environment import Environment env = Environment.instance() row = ( env.db.session .query(Campaign).get(campaign_id) ) if row: ret = row_to_dict(row) countries = [] for country in row.countries: countries.append(country.country_code) ret['countries'] = countries return ret else: return None
def unschedule_distribution(dist_id): """ Remove a distribution id if it is scheduled but not deployed yet """ from splice.environment import Environment env = Environment.instance() # getting around PEP8 E711 warning. This is necessary for SQLAlchemy none_value = None stmt = (env.db.session.query(Distribution).filter( Distribution.id == dist_id).filter( Distribution.scheduled_start_date != none_value)) dist = stmt.one() dist.scheduled_start_date = None env.db.session.commit()
def test_single_creative_upload_endpoint(self): """Test the API endpoint for the single creative upload""" from splice.environment import Environment env = Environment.instance() url = url_for('api.tile.handler_creative_upload') with zipfile.ZipFile(self.zip_file, "r") as zf: f = zf.getinfo("samples/firefox_mdn_a.png") data = { 'creative': (StringIO.StringIO(zf.read(f)), 'creative.png') } response = self.client.post(url, data=data) assert_equal(response.status_code, 200) creative_url = json.loads(response.data)['result'] bucket = env.s3.get_bucket(env.config.S3["bucket"]) s3_key = os.path.basename(creative_url) key = bucket.get_key(s3_key) self.assertIsNotNone(key)
def get_distributions(limit=100, *args, **kwargs): from splice.environment import Environment env = Environment.instance() rows = ( env.db.session .query(Distribution.url, Distribution.created_at) .order_by(Distribution.id.desc()) .limit(limit) .all() ) # ensure items are lists of lists rather than KeyedTuples # KeyedTuples may serialize differently on other systems output = [list(d) for d in rows] return output
def get_adgroup(id): from splice.environment import Environment env = Environment.instance() row = ( env.db.session .query(Adgroup).get(id) ) if row is None: return None new = row_to_dict(row) categories = [] for category in row.categories: categories.append(category.category) new['categories'] = categories return new
def switch_to_cdn_url(image_uri): """Switch the S3 URI with the CDN URI We store the S3 URI in the database to allow campaign managers to view the uploaded images without suffering from the CDN latency. When preparing to generate tiles for the Firefox, it's necessary to replace the S3 URIs with the CDN ones, as Firefox only allows images hosted on a trusted URI, e.g. "tiles.cdn.mozilla.net". See https://github.com/oyiptong/splice/issues/203 for more details. """ from splice.environment import Environment env = Environment.instance() try: basename = os.path.basename(image_uri) except: basename = image_uri # if the image_uri is a hash string, use it directly return os.path.join(env.config.CLOUDFRONT_BASE_URL, "images/%s" % basename)
def get_stats(group_by, filters=None, limit=60): """ Get aggregated stats based on a list of group_by fields and filters """ from splice.environment import Environment env = Environment.instance() isd = aliased(impression_stats_daily) base_table = isd local_filters = filters.copy() has_cross_db_filters = bool( CROSS_DB_COLUMNS.intersection(filters)) if filters else False cross_db_group_by = list(CROSS_DB_COLUMNS.intersection(group_by)) # Build base table and list of tiles if cross_db_group_by: base_table = build_subquery_table(env=env, stats_table=isd, group_by=group_by, cross_db_group_by=cross_db_group_by, filters=filters) # No tiles were found, so no stats if base_table is None: return None elif has_cross_db_filters: tiles_result = get_tiles(limit_fields=['id'], filters=filters) # No tiles were found, so no stats if not tiles_result: return None local_filters['tile_id'] = [t['id'] for t in tiles_result] # Build query rows = build_base_query(env=env, group_by=group_by, base_table=base_table) rows = add_filters(query=rows, base_table=base_table, filters=local_filters) rows = rows.order_by(base_table.c[group_by[0]]).limit(limit) rows = rows.all() return [tuple_to_dict(r) for r in rows] if rows else None
def tile_exists(target_url, bg_color, title, typ, image_uri, enhanced_image_uri, locale, frecent_sites, time_limits, channel_id, conn=None, *args, **kwargs): """ Return the id of a tile having the data provided """ from splice.environment import Environment env = Environment.instance() if conn is not None: sm = sessionmaker(bind=conn) session = sm() else: session = env.db.session # we add order_by in the query although it shouldn't be necessary # this is because of a previous bug where duplicate tiles could be created results = ( session .query(Tile.id, Tile.adgroup_id) .filter(Tile.target_url == target_url) .filter(Tile.bg_color == bg_color) .filter(Tile.title == title) .filter(Tile.image_uri == image_uri) .filter(Tile.enhanced_image_uri == enhanced_image_uri) .filter(Adgroup.locale == locale) .filter(Adgroup.start_date == time_limits.get('start')) .filter(Adgroup.end_date == time_limits.get('end')) .filter(Adgroup.start_date_dt == time_limits.get('start_dt')) .filter(Adgroup.end_date_dt == time_limits.get('end_dt')) .filter(Adgroup.channel_id == channel_id) .join(Adgroup.tiles) .order_by(asc(Tile.id)) ) if results: for tile_id, adgroup_id in results: # now check frecent sites for this tile db_frecents = get_frecent_sites_for_tile(tile_id, conn) if db_frecents == sorted(set(frecent_sites)): return tile_id, adgroup_id return None, None
def get_all_distributions(limit=100): """ Obtain distributions, partitioned by channels with up to ``limit`` results for each channel """ from splice.environment import Environment env = Environment.instance() dist_cte = ( env.db.session .query( Distribution.channel_id, Distribution.url, Distribution.created_at, func.row_number().over( partition_by=Distribution.channel_id, order_by=Distribution.created_at.desc()) .label('row_num') ) ).cte() stmt = ( env.db.session .query( dist_cte.c.channel_id, dist_cte.c.url, dist_cte.c.created_at) .filter(dist_cte.c.row_num <= limit) .order_by(dist_cte.c.created_at.desc()) ) rows = stmt.all() channels = {} for row in rows: c_dists = channels.setdefault(row.channel_id, []) c_dists.append({'url': row.url, 'created_at': row.created_at}) return channels
def get_tile_ids_by_group(group_by, filters=None): from splice.environment import Environment env = Environment.instance() group_by_field = { 'category': AdgroupCategory.category, 'account_id': Account.id, 'campaign_id': Campaign.id, 'adgroup_id': Adgroup.id }.get(group_by) rows = (env.db.session.query( group_by_field.label(group_by), func.array_agg(Tile.id).label('tile_ids')).select_from(Tile).group_by( group_by_field)) rows = add_joins_for_group_by(query=rows, group_by=group_by) rows = add_filters(rows, filters, group_by) rows = rows.all() return [tuple_to_dict(r) for r in rows] if rows else None
def unschedule_distribution(dist_id): """ Remove a distribution id if it is scheduled but not deployed yet """ from splice.environment import Environment env = Environment.instance() # getting around PEP8 E711 warning. This is necessary for SQLAlchemy none_value = None stmt = ( env.db.session .query(Distribution) .filter(Distribution.id == dist_id) .filter(Distribution.scheduled_start_date != none_value) ) dist = stmt.one() dist.scheduled_start_date = None env.db.session.commit()
def insert_tile(target_url, bg_color, title, type, image_uri, enhanced_image_uri, locale, conn=None, *args, **kwargs): from splice.environment import Environment env = Environment.instance() trans = None if conn is None: conn = env.db.engine.connect() trans = conn.begin() try: conn.execute( text( "INSERT INTO tiles (" " target_url, bg_color, title, type, image_uri, enhanced_image_uri, locale, created_at" ") " "VALUES (" " :target_url, :bg_color, :title, :type, :image_uri, :enhanced_image_uri, :locale, :created_at" ")" ), target_url=target_url, bg_color=bg_color, title=title, type=type, image_uri=image_uri, enhanced_image_uri=enhanced_image_uri, locale=locale, created_at=datetime.utcnow() ) result = conn.execute("SELECT MAX(id) FROM tiles;").scalar() if trans is not None: trans.commit() return result except: if trans is not None: trans.rollback() raise
def get_contents(): from splice.environment import Environment env = Environment.instance() rows = ( env.db.session .query(Content) .order_by(Content.id.desc()) .all() ) output = [] for d in rows: versions = [] for version in d.versions: versions.append(row_to_dict(version)) c = row_to_dict(d) c['versions'] = versions output.append(c) return output
def setup_routes(app): env = Environment.instance() global register_flask_restful if "signing" in env.config.ALLOWED_APPS: import splice.web.api.content splice.web.api.content.register_routes(app) if "tiles" in env.config.ALLOWED_APPS: import splice.web.views splice.web.views.register_routes(app) import splice.web.api.heartbeat splice.web.api.heartbeat.register_routes(app) if not register_flask_restful: import splice.web.api.init splice.web.api.init.register_routes(app) import splice.web.api.account splice.web.api.account.register_routes(app) import splice.web.api.campaign splice.web.api.campaign.register_routes(app) import splice.web.api.adgroup splice.web.api.adgroup.register_routes(app) import splice.web.api.tile splice.web.api.tile.register_routes(app) import splice.web.api.reporting splice.web.api.reporting.register_routes(app) import splice.web.api.distribution splice.web.api.distribution.register_routes(app) register_flask_restful = True
def setUp(self): super(TestReporting, self).setUp() def values(fd, date_index=0): for line in fd: row = [el.decode('utf-8') for el in line.split(',')] # sqlalchemy doesn't like date strings.... row[date_index] = datetime.strptime(row[date_index], "%Y-%m-%d") yield row # load db from splice.models import impression_stats_daily, newtab_stats_daily conn = Environment.instance().db.engine.connect() with open(self.get_fixture_path('impression_stats.csv')) as fd: for row in values(fd, 1): ins = impression_stats_daily.insert().values(row) conn.execute(ins) with open(self.get_fixture_path('newtabs.csv')) as fd: for row in values(fd): ins = newtab_stats_daily.insert().values(row) conn.execute(ins)
def insert_distribution(url, *args, **kwargs): from splice.environment import Environment env = Environment.instance() conn = env.db.engine.connect() trans = conn.begin() try: conn.execute( text( "INSERT INTO distributions (" " url, created_at" ") " "VALUES (" " :url, :created_at" ")" ), url=url, created_at=datetime.utcnow() ) trans.commit() except: trans.rollback() raise
def get_adgroups_by_campaign_id(campaign_id): from splice.environment import Environment env = Environment.instance() rows = ( env.db.session .query(Adgroup) .filter(Adgroup.campaign_id == campaign_id) .order_by(Adgroup.id.desc()) .all() ) output = [] for d in rows: new = row_to_dict(d) categories = [] for category in d.categories: categories.append(category.category) new['categories'] = categories output.append(new) return output
def get(self, target): """Returns the init data including locales, countries, channels etc. Params: target string, [all|locales|countries|channels] """ target = target.lower() if target == "all": locales = Environment.instance()._load_locales()[:-1] locales.sort() countries = Environment.instance()._load_countries()[:-1] country_items = [{"country_code": code, "country_name": name} for code, name in countries] channels = get_channels() categories = Environment.instance()._load_categories() data = { "countries": country_items, "channels": channels, "locales": locales, "categories": categories, } return {'result': marshal(data, all_fields)} elif target == "locales": # the last item is 'ERROR', client won't need this locales = Environment.instance()._load_locales()[:-1] locales.sort() return marshal({"results": locales}, locale_fields) elif target == "countries": # the last item is 'ERROR', client won't need this countries = Environment.instance()._load_countries()[:-1] country_items = [{"country_code": code, "country_name": name} for code, name in countries] return {'results': marshal(country_items, country_fields)} elif target == "channels": channels = get_channels() return {'results': marshal(channels, channel_fields)} elif target == "categories": categories = Environment.instance()._load_categories() return marshal({"results": categories}, category_fields) else: return {"message": "Unknown target, must be one of [all|locales|countries|channels]"}, 404
def get_country_code(): for code, name in Environment.instance()._load_countries(): yield dict(country_name=name, country_code=code)
def main(): """ Usage: manage.py db upgrade python index_walker.py This script is going to populate Account and Campaign database structures. It does this by reading the currently deployed tile distributions (s3), where it determines the currently active tile set, as well as the geo-targetting data (currently only country level) for each tile/adgroup. The script will discriminate between 'active' and 'inactive' adgroups based on whether or not the adgroup exists in the current distribution. Inactive adgroups are given start/end dates in campaigns that are in the *past*. Active adgroups are placed in campaigns that start on their adgroup creation date and end at some far distant future date. We are using some data structures developed by the Zenko project to build the derive_account_campaign() function in order to identify existing campaigns from our tile data. Campaign objects are considered unique by grouping together the following keys in the adgroup: * the name of the campaign and account returned by derive_account_campaign() * the channel of the adgroup * the 'active' flag (determined as explained above) of the adgroup One campaign row will be assigned for each unique campaign detected. The script will populate the adgroup.campaign_id with the campaign that the adgroup fits into. All writes to the database are transactional. This script is *not* idempotent, and will therefore check that accounts and campaigns tables are empty before running. :return: """ index_files = [ 'https://tiles-resources-prod-tiless3-qbv71djahz3b.s3.amazonaws.com/hello_tile_index_v3.json', 'https://tiles-resources-prod-tiless3-qbv71djahz3b.s3.amazonaws.com/android_tile_index_v3.json', 'https://tiles-resources-prod-tiless3-qbv71djahz3b.s3.amazonaws.com/desktop_tile_index_v3.json', 'https://tiles-resources-prod-tiless3-qbv71djahz3b.s3.amazonaws.com/desktop-prerelease_tile_index_v3.json' ] active_tiles = set() tile_geodes = defaultdict(set) for index in index_files: r = requests.get(index) if 200 <= r.status_code <= 299: data = json.loads(r.text) for geo_locale, dist_dict in data.iteritems(): try: ag = dist_dict.get('ag') if ag: geode = tuple(geo_locale.split('/')) print "processing ", geo_locale ag_r = requests.get(ag) if 200 <= ag_r.status_code <= 299: tiles = json.loads(ag_r.text) directory_tiles = tiles['directory'] suggested_tiles = tiles['suggested'] newts = set(chain((t['directoryId'] for t in directory_tiles), (t['directoryId'] for t in suggested_tiles))) active_tiles.update(newts) for tile_id in newts: tile_geodes[tile_id].add(geode) except: print "skipping ", geo_locale # print "active", str(active_tiles) env = Environment.instance() db_uri = env.config.SQLALCHEMY_DATABASE_URI engine = create_engine(db_uri) connection = engine.connect() try: # assert that campaigns and accounts are empty account_count, = connection.execute("SELECT count(*) FROM accounts").fetchone() assert account_count == 0, "Accounts not empty" campaign_count, = connection.execute("SELECT count(*) FROM campaigns").fetchone() assert campaign_count == 0, "Campaigns not empty" # collate/generate campaign and account data # stmt = select([Adgroup.id, Tile.target_url, Adgroup.channel_id, Adgroup.created_at]). \ # where(Tile.adgroup_id == Adgroup.id) stmt = """SELECT a.id, t.target_url, t.title, a.channel_id, a.created_at, c.name, t.id, t.image_uri, t.enhanced_image_uri FROM adgroups a JOIN tiles t on t.adgroup_id = a.id JOIN channels c on a.channel_id = c.id""" result = connection.execute(stmt) campaign_id = 0 account_id = 0 campaigns = dict() adgroups = defaultdict(list) countries = defaultdict(set) accounts = dict() for adgroup_id, url, title, channel, created_at, channel_name, tile_id, i_url, ei_url in result: assert all(x is not None for x in (adgroup_id, url, channel)), \ "Some of %s is None" % str((adgroup_id, url, channel)) # do tld -> account mapping substitution active = adgroup_id in active_tiles account_name, campaign_name = derive_account_campaign(adgroup_id, title, url) curr = (account_name, campaign_name, channel, active) if curr not in campaigns: # this is a new campaign, see if it's active campaign_id += 1 if active: # print "active", curr start_date = created_at.date() end_date = ARBITRARY_FUTURE else: start_date = created_at.date() end_date = created_at.date() # insert it into the right account if account_name not in accounts: account_id += 1 next_account_id = account_id accounts[account_name] = account_id else: next_account_id = accounts[account_name] active_name = '' if active else ' (Closed)' ctuple = (campaign_id, start_date, end_date, "%s %s%s" % (safe_str(campaign_name), channel_name, active_name), False, channel, next_account_id) campaigns[curr] = ctuple # append all the countries for sub_country_code, sub_locale in tile_geodes[adgroup_id]: countries[campaign_id].add(sub_country_code) # this fixes the closed campaigns can't get the correct country code as above if account_name in _campaign_countries: countries[campaign_id] = countries[campaign_id].union(_campaign_countries[account_name]) # print "campaign", ctuple adgroups[campaigns[curr][0]].append(adgroup_id) # insert data into new tables Session = sessionmaker(bind=engine) session = Session() # we need to monkeypatch flask's monkeypatch... session._model_changes = None try: # grab all s3 images and reproduce image hash bucket = env.s3.get_bucket(env.config.S3["bucket"]) images = bucket.list('images/') image_hashes = defaultdict(list) enhanced_image_hashes = defaultdict(list) stmt = "SELECT t.id, t.image_uri, t.enhanced_image_uri FROM tiles t" for tile_id, image_uri, enhanced_image_uri in connection.execute(stmt): image_hashes[image_uri].append(tile_id) enhanced_image_hashes[enhanced_image_uri].append(tile_id) for image in images: ext = image.key.split('.')[-1] if ext == 'svg': ext = 'svg+xml' elif ext == 'jpeg': ext = 'jpg' new_hash = hashlib.sha1("data:image/%s;base64,%s" % (ext, base64.b64encode(image.get_contents_as_string()))).hexdigest() new_uri = image.generate_url(expires_in=0, query_auth=False) # remove x-amz-security-token, which is inserted even if query_auth=False # ref: https://github.com/boto/boto/issues/1477 uri = furl(new_uri) try: uri.args.pop('x-amz-security-token') except: pass new_uri = uri.url tile_ids = image_hashes.get(new_hash) if tile_ids: print "image: %s" % image.key session.execute("update tiles set image_uri = '%s' where id in (%s)" % (new_uri, ','.join(str(tid) for tid in tile_ids))) tile_ids = enhanced_image_hashes.get(new_hash) if tile_ids: print "enhanced_image: %s" % image.key session.execute("update tiles set enhanced_image_uri = '%s' where id in (%s)" % (new_uri, ','.join(str(tid) for tid in tile_ids))) account_stmt = insert(Account).values([dict(id=aid, name=aname) for aname, aid in accounts.iteritems()]) session.execute(account_stmt) session.execute("SELECT setval('accounts_id_seq', %s, false)" % (account_id + 1)) target_query = StringIO() target_query.write("""insert into campaigns(id, start_date, end_date, name, paused, channel_id, account_id) values """) pg2_cursor = session.connection().connection.cursor() for campaign_tuple in campaigns.values(): # print "%s %s" % (type(campaign_tuple), campaign_tuple) target_query.write(unicode(pg2_cursor.mogrify("(%s,%s,%s,%s,%s,%s,%s),", campaign_tuple))) session.execute(target_query.getvalue()[:-1]) session.execute("SELECT setval('campaigns_id_seq', %s, false)" % (campaign_id + 1)) cc_stmt = insert(CampaignCountry).values([dict(country_code=cc, campaign_id=cid) for cid, cc_list in countries.iteritems() for cc in cc_list]) session.execute(cc_stmt) adgroup_updates = [update(Adgroup) .where(Adgroup.id.in_(tuple(adgroup_ids))) .values(dict(campaign_id=cid, type="directory", name="adgoup_cpmg_%d" % cid)) for cid, adgroup_ids in adgroups.iteritems()] for adgroup_stmt in adgroup_updates: session.execute(adgroup_stmt) # set the type for the suggested adgroups session.execute("update adgroups set type = 'suggested' where id in (select distinct adgroup_id from adgroup_sites)") session.commit() except Exception as e: print "Error: ", str(e) session.rollback() raise e finally: connection.close() print "done"
import os import csv from splice.environment import Environment from splice.webapp import create_webapp from flask.ext.testing import TestCase db_uri = os.environ.get('TEST_DB_URI') or 'postgres://localhost/splice_test' env = Environment.instance(test=True, test_db_uri=db_uri) class BaseTestCase(TestCase): def __init__(self, methodName='runTest'): self.env = env super(BaseTestCase, self).__init__(methodName) create_webapp(self.env) def create_app(self): return self.env.application def setUp(self): self.env.db.drop_all() self.create_app() self.env.db.create_all() def tile_values(fd): for line in fd: row = [el.decode('utf-8') for el in line.split(',')] yield dict( zip(('target_url', 'bg_color', 'title', 'type', 'image_uri', 'enhanced_image_uri', 'adgroup_id', 'locale'), row))
def load(self): # Step needed to get around flask's import time side-effects from splice.environment import Environment env = Environment.instance() return env.application
import calendar from datetime import datetime, timedelta from nose.tools import assert_equal from flask import url_for, json from mock import Mock, PropertyMock from tests.base import BaseTestCase from tests.test_scheduling import ScheduleTest import splice.ingest from splice.queries import ( get_scheduled_distributions, get_all_distributions, get_channels) from splice.environment import Environment env = Environment.instance() class TestAuthoring(BaseTestCase): def setUp(self): self.key_mock = Mock() self.key_mock.name = PropertyMock() self.bucket_mock = Mock() def bucket_get_key_mock(*args, **kwargs): return None self.bucket_mock.get_key = Mock(side_effect=bucket_get_key_mock) def get_key_mock(*args, **kwargs): return self.key_mock splice.ingest.Key = Mock(side_effect=get_key_mock)
return True # copy data to modify inplace, do NOT mutate the original input, cause # memory is much cheaper than people's mind data = copy.deepcopy(data) is_compact = "assets" in data try: jsonschema.validate(data, get_payload_schema(is_compact)) except jsonschema.exceptions.ValidationError, e: command_logger.error("ERROR: cannot validate JSON: {0}".format( e.message)) exc_class, exc, tb = sys.exc_info() raise exc_class, exc, tb from splice.environment import Environment env = Environment.instance() conn = env.db.engine.connect() if is_compact: assets, distributions = data["assets"], data["distributions"] else: assets, distributions = None, data ingested_data = {} country_locales = sorted(distributions.keys()) try: with session_scope(conn) as session: if not env.is_test: # lock the tables to avoid other concurrent write transactions session.execute(
def generate_artifacts(data, channel_name, deploy): """Generate locale json files for upload to s3 :param data: tile data for upload :channel_name: distribution channel name :deploy: tells whether to deploy to the channels """ artifacts = [] tile_index = {'__ver__': 3} image_index = {} env = Environment.instance() def image_add(hash, mime_type, image, locale, tile_id, *args, **kwargs): """ Add an image to the index and artifact list, return file url """ if hash not in image_index: try: file_ext = mime_extensions[mime_type] except: raise IngestError( "Unsupported file type: {0}".format(mime_type)) s3_key = "images/{0}.{1}.{2}".format(hash, len(image), file_ext) url = os.path.join(env.config.CLOUDFRONT_BASE_URL, s3_key) image_index[hash] = url artifacts.append({"mime": mime_type, "key": s3_key, "data": image}) return image_index[hash] safe_channel_name = urllib.quote(channel_name) for country_locale, tile_data in data.iteritems(): sug_tiles = [] dir_tiles = [] country_code, locale = country_locale.split("/") # copy data to modify inplace tile_data = copy.deepcopy(tile_data) for tile in tile_data: # image splitting from input url = image_add(*slice_image_uri(tile["imageURI"]), locale=locale, tile_id=tile["directoryId"]) tile["imageURI"] = url if 'enhancedImageURI' in tile: url = image_add(*slice_image_uri(tile["enhancedImageURI"]), locale=locale, tile_id=tile["directoryId"]) tile["enhancedImageURI"] = url if 'frecent_sites' in tile: sug_tiles.append(tile) else: dir_tiles.append(tile) # deploy both v2 and v3 versions if deploy: # v2 legacy_tiles = copy.deepcopy(dir_tiles) for tile in legacy_tiles: # remove extra metadata for key in ('frequency_caps', 'adgroup_name', 'adgroup_categories', 'explanation', 'check_inadjacency', 'time_limits'): tile.pop(key, None) legacy = json.dumps({locale: legacy_tiles}, sort_keys=True) legacy_hsh = hashlib.sha1(legacy).hexdigest() legacy_key = "{0}/{1}.{2}.json".format(safe_channel_name, country_locale, legacy_hsh) artifacts.append({ "key": legacy_key, "data": legacy, }) # v3 ag = json.dumps({ 'suggested': sug_tiles, 'directory': dir_tiles }, sort_keys=True) ag_hsh = hashlib.sha1(ag).hexdigest() ag_key = "{0}/{1}.{2}.ag.json".format(safe_channel_name, country_locale, ag_hsh) artifacts.append({ "key": ag_key, "data": ag, }) tile_index[country_locale] = { 'legacy': os.path.join(env.config.CLOUDFRONT_BASE_URL, legacy_key), 'ag': os.path.join(env.config.CLOUDFRONT_BASE_URL, ag_key), } if deploy: # include tile index if deployment is requested. 'ver' allows us to make onyx # backward compatible more easily artifacts.append({ "key": "{0}_{1}".format(safe_channel_name, env.config.S3["tile_index_key"]), "data": json.dumps(tile_index, sort_keys=True), "force_upload": True, }) # include data submission in artifacts data_serialized = json.dumps(compress_payload(data), sort_keys=True) hsh = hashlib.sha1(data_serialized).hexdigest() dt_str = datetime.utcnow().isoformat().replace(":", "-") artifacts.append({ "key": os.path.join("/distributions", safe_channel_name, "{0}.{1}.json".format(hsh, dt_str)), "data": data_serialized, "dist": True }) return artifacts
from splice.environment import Environment env = Environment.instance("integration_tests.prod_settings.DefaultConfig") # env = Environment.instance() from splice.queries import tile_stats_weekly, slot_stats_weekly, tile_stats_monthly, slot_stats_monthly, \ tile_summary_weekly, slot_summary_weekly, tile_summary_monthly, slot_summary_monthly, \ tile_stats_daily, tile_summary_daily with env.application.app_context(): # TODO: check results conn = env.db.engine.connect() print "\ntile_summary_weekly" key, rval = tile_summary_weekly(conn, '2014-05-15') for x in rval: print x print "\ntile_summary_daily" _, rval = tile_summary_daily(conn, '2014-05-15') for year, week, tile_id, title, imps, clicks, pinned, blocked, spon, spon_link in rval: print year, week, tile_id, title, imps, clicks, pinned, blocked, spon, spon_link print "\ntile_stats_weekly - tile_id = 2" _, rval = tile_stats_weekly(conn, '2014-05-15', '2') for year, week, tile_id, title, country, locale, imps, clicks, pinned, blocked, spon, spon_link in rval: print year, week, tile_id, title, country, locale, imps, clicks, pinned, blocked, spon, spon_link print "\ntile_stats_weekly - tile_id = 2, country_code = US" _, rval = tile_stats_weekly(conn, '2014-05-15', '2', 'US') for year, week, tile_id, title, country, locale, imps, clicks, pinned, blocked, spon, spon_link in rval: print year, week, tile_id, title, country, locale, imps, clicks, pinned, blocked, spon, spon_link
def create_webapp(*args, **kwargs): env = Environment.instance(*args, **kwargs) setup_routes(env.application) return env.application
def populate_countries(table): countries = Environment.instance()._load_countries() op.bulk_insert( table, [{"country_code": code, "country_name": name} for code, name in countries] )
def insert_tile(target_url, bg_color, title, typ, image_uri, enhanced_image_uri, locale, frecent_sites, time_limits, frequency_caps, adgroup_name, explanation, check_inadjacency, channel_id, conn=None, *args, **kwargs): from splice.environment import Environment env = Environment.instance() now = datetime.utcnow() trans = None if conn is None: conn = env.db.engine.connect() trans = conn.begin() try: conn.execute( text( "INSERT INTO adgroups (" "locale, " "start_date, " "end_date, " "start_date_dt, " "end_date_dt, " "name, " "explanation, " "frequency_cap_daily, " "frequency_cap_total, " "check_inadjacency, " "channel_id, " "created_at" ") " "VALUES (" ":locale, " ":start_date, " ":end_date, " ":start_date_dt, " ":end_date_dt, " ":adgroup_name, " ":explanation, " ":frequency_cap_daily, " ":frequency_cap_total, " ":check_inadjacency, " ":channel_id, " ":created_at" ")" ), locale=locale, start_date=time_limits.get('start'), end_date=time_limits.get('end'), start_date_dt=time_limits.get('start_dt'), end_date_dt=time_limits.get('end_dt'), adgroup_name=adgroup_name, explanation=explanation, frequency_cap_daily=frequency_caps['daily'], frequency_cap_total=frequency_caps['total'], check_inadjacency=check_inadjacency, channel_id=channel_id, created_at=now, ) ag_id = conn.execute("SELECT MAX(id) FROM adgroups;").scalar() if frecent_sites: values = ','.join(["(%d, '%s', '%s')" % (ag_id, site, now) for site in frecent_sites]) stmt = "INSERT INTO adgroup_sites (adgroup_id, site, created_at) VALUES %s" % values conn.execute(stmt) conn.execute( text( "INSERT INTO tiles (" " target_url, bg_color, title, type, image_uri, enhanced_image_uri, created_at, locale, adgroup_id" ") " "VALUES (" " :target_url, :bg_color, :title, :type, :image_uri, :enhanced_image_uri, :created_at, :locale, :adgroup_id" ")" ), target_url=target_url, bg_color=bg_color, title=title, type=typ, image_uri=image_uri, enhanced_image_uri=enhanced_image_uri, created_at=now, locale=locale, adgroup_id=ag_id ) tile_id = conn.execute("SELECT MAX(id) FROM tiles;").scalar() if trans is not None: trans.commit() return tile_id, ag_id except Exception as e: if trans is not None: trans.rollback() raise e
def main(): # get argument parser = OptionParser( usage='Usage: %prog [<CDN_URL>]' '\n\nArguments:' '\n CDN_URL Of the format "<scheme>://<fqdn>".' ' Trailing "/" not allowed.' '\n\nExamples:' '\n %prog https://tiles.cdn.mozilla.net' ) parser.set_defaults( quiet=False, verbose=False, ) parser.add_option( '-q', '--quiet', action='store_true', dest='quiet', help="Don't report NOTICE", ) parser.add_option( '-v', '--verbose', action='store_true', dest='verbose', help='Report SUCCESS', ) options, args = parser.parse_args() try: from splice.environment import Environment config = Environment.instance().config cdn = 'https://%s.s3.amazonaws.com' % config.S3['bucket'] tile_index_key = config.S3['tile_index_key'] except Exception: cdn = 'https://tiles.cdn.mozilla.net' tile_index_key = 'tile_index_v3.json' channels = [ 'desktop', 'android', 'desktop-prerelease', 'hello' ] if len(args) == 1: cdn = args.pop() elif len(args) > 1: parser.parse_args(['-h']) if not options.quiet: print( 'NOTICE: crawling: %s/%s_%s' % (cdn, tuple(channels), tile_index_key) ) print('NOTICE: calculating tiles urls') errors = [] # extract tiles urls from tile index try: urls = [ tiles_url for index in validate( grequests.imap( (grequests.get('%s/%s_%s' % (cdn, channel, tile_index_key), allow_redirects=False,) for channel in channels), size=10 ), options.verbose, errors, ) for key, value in index.json().iteritems() if '/' in key for tiles_url in value.values() ] tiles_urls = set() for url in urls: if type(url) is list: tiles_urls.update(url) else: tiles_urls.add(url) if not options.quiet: print('NOTICE: tiles urls extracted: %s' % len(tiles_urls)) print('NOTICE: calculating image urls') # extract image urls from tiles image_urls = set([ image_url for tiles in validate( grequests.imap( (grequests.get(tiles_url, allow_redirects=False) for tiles_url in tiles_urls), size=10 ), options.verbose, errors, ) for value_x in tiles.json().values() for value_y in value_x for key, image_url in value_y.iteritems() if key in ['imageURI', 'enhancedImageURI'] ]) if not options.quiet: print('NOTICE: image urls extracted: %s' % len(image_urls)) print('NOTICE: validating image urls') # Two things to notice here: # 1. expanding the list comprehension is necessary to get the 'validate' # step above to actually evaluate (it's lazy.) # 2. the actual value of the list comprehension is dropped, not returned. [ valid.url for valid in validate( grequests.imap( (grequests.head(image_url, allow_redirects=False) for image_url in image_urls), size=10 ), options.verbose, errors, ) ] except Exception as e: msg = 'ERROR: %s' % e print(msg) print(traceback.format_exc()) errors.append(msg) if errors: exit(1)
from datetime import datetime from sqlalchemy import text from splice.environment import Environment db = Environment.instance().db metadata = db.metadata class Channel(db.Model): __tablename__ = "channels" id = db.Column(db.Integer(), autoincrement=True, primary_key=True, info={"identity": [1, 1]}) name = db.Column(db.String(32), nullable=False, unique=True) created_at = db.Column(db.DateTime(), default=datetime.utcnow) class Distribution(db.Model): __tablename__ = "distributions" id = db.Column(db.Integer(), autoincrement=True, primary_key=True, info={"identity": [1, 1]}) url = db.Column(db.Text(), nullable=False) channel_id = db.Column(db.Integer(), db.ForeignKey('channels.id'), nullable=False) deployed = db.Column(db.Boolean(), default=False)
def distribute(data, channel_id, deploy, scheduled_dt=None): """Upload tile data to S3 :data: tile data :channel_id: channel id for which to distribute tile data :deploy: whether to deploy tiles to firefox immediately :scheduled_dt: an optional scheduled date in the future for deploy. overrides deploy """ command_logger.info("Generating Data") from splice.models import Channel from splice.environment import Environment env = Environment.instance() if scheduled_dt: now = datetime.utcnow() if now > scheduled_dt: raise ScheduleError("scheduled date needs to be in the future") elif deploy: raise ScheduleError( "cannot specify deploy and schedule at the same time") channel = (env.db.session.query(Channel).filter( Channel.id == channel_id).one()) artifacts = generate_artifacts(data, channel.name, deploy) command_logger.info("Uploading to S3 for channel {0}".format(channel.name)) bucket = Environment.instance().s3.get_bucket( Environment.instance().config.S3["bucket"]) cors = CORSConfiguration() cors.add_rule("GET", "*", allowed_header="*") bucket.set_cors(cors) distributed = [] headers = { 'Cache-Control': 'public, max-age=31536000', 'Content-Disposition': 'inline', } # upload individual files for file in artifacts: if "mime" in file: headers['Content-Type'] = file["mime"] else: # default to JSON for artifacts headers['Content-Type'] = "application/json" key = bucket.get_key(file["key"]) uploaded = False if key is None or file.get("force_upload"): key = Key(bucket) key.name = file["key"] key.set_contents_from_string(file["data"], headers=headers) key.set_acl("public-read") uploaded = True url = key.generate_url(expires_in=0, query_auth=False) # remove x-amz-security-token, which is inserted even if query_auth=False # ref: https://github.com/boto/boto/issues/1477 uri = furl(url) try: uri.args.pop('x-amz-security-token') except: pass url = uri.url if uploaded: command_logger.info("UPLOADED {0}".format(url)) else: command_logger.info("SKIPPED {0}".format(url)) distributed.append([url, uploaded]) if file.get("dist", False): insert_distribution(url, channel_id, deploy, scheduled_dt) return distributed
def get_possible_distributions(today=None, channel_id=None): """Generate all possible distributions for a given date and channel. The result tiles are grouped by the (country, locale, channel_id), a tile index file will be generated as well as the last item of result. Note that all tiles in a distribution will be ordered by the created timestamp descendingly. Params: today: date, the target date on which to produce the distributions. The default is None, which means use the current date. channel_id: int, the target channel_id. Will produce distributions for all the channels if not specified. Returns: A distribution dictionary of (channel, distribution_list) type, where channel is the name of the channel, and distribution_list consists of all distributions for that channel. For example: { "desktop": [ { "key": "desktop/US/en-US/some_hash_0.json", "data": {"distribution_payload"} }, { "key": "desktop/CA/en-US/some_hash_1.json", "data": {"distribution_payload"} }, ..., { "key": "desktop/CA/en-GB/some_hash_2.json", "data": {"distribution_payload"} }, { "key": "desktop_tile_index.json" "data": {"tile_index_payload"}, "force_upload": True } ] } """ # TODO([email protected]): Clean up suggested tiles from splice.environment import Environment env = Environment.instance() if today is None: today = datetime.utcnow().date() query = (env.db.session.query(Tile).filter(Tile.paused == false()).filter( Adgroup.paused == false()).filter(Campaign.paused == false()).filter( Campaign.end_date >= today).filter(Campaign.start_date <= today). join(Adgroup).join(Campaign).join(CampaignCountry).order_by( desc(Tile.created_at))) if channel_id is not None: query = query.filter(Campaign.channel_id == channel_id) rows = query.all() bucketer = load_bucketer() artifacts = defaultdict(list) tiles = {} for tile in rows: locale = tile.adgroup.locale countries = tile.adgroup.campaign.countries channel = tile.adgroup.channel.name safe_channel_name = urllib.quote(channel) new_tiles = _create_tiles(tile, bucketer) legacy_tiles = _create_tiles(tile, bucketer, True) suggested = tile.adgroup.type == "suggested" and len( tile.adgroup.categories) > 0 for country in countries: key = (safe_channel_name, country.country_code, locale) value = tiles.setdefault( key, Dists(legacy=[], directory=[], suggested=[])) if suggested: value.suggested.extend(new_tiles) else: value.directory.extend(new_tiles) value.legacy.extend(legacy_tiles) tile_index = {} for (channel, country, locale), (legacy, directory, _suggested) in tiles.items(): country_locale = "%s/%s" % (country, locale) legacy_keys, ag_keys = [], [] # v2 for legacy_tiles in multiplex_directory_tiles(legacy): legacy_json = json.dumps({'locale': legacy_tiles}, sort_keys=True) legacy_hsh = hashlib.sha1(legacy_json).hexdigest() legacy_key = "{0}/{1}.{2}.json".format(channel, country_locale, legacy_hsh) legacy_keys.append(legacy_key) artifacts[channel].append({"key": legacy_key, "data": legacy_json}) # v3 for ag_tiles in multiplex_directory_tiles(directory): ag = json.dumps({ 'suggested': [], 'directory': ag_tiles }, sort_keys=True) ag_hsh = hashlib.sha1(ag).hexdigest() ag_key = "{0}/{1}.{2}.ag.json".format(channel, country_locale, ag_hsh) ag_keys.append(ag_key) artifacts[channel].append({ "key": ag_key, "data": ag, }) tile_index_channel = tile_index.setdefault(channel, {'__ver__': 3}) all_legacy_keys = [ os.path.join(env.config.CLOUDFRONT_BASE_URL, k) for k in legacy_keys ] all_ag_keys = [ os.path.join(env.config.CLOUDFRONT_BASE_URL, k) for k in ag_keys ] tile_index_channel[country_locale] = { 'legacy': all_legacy_keys, 'ag': all_ag_keys } # the index files for channel, tile_index_channel in tile_index.items(): artifacts[channel].append({ "key": "{0}_{1}".format(channel, env.config.S3["tile_index_key"]), "data": json.dumps(tile_index_channel, sort_keys=True), "force_upload": True }) return artifacts