Example #1
0
def main():
    print "So it begins..."
    target_db_uri = Environment.instance().config.SQLALCHEMY_DATABASE_URI
    target_connection = psycopg2.connect(target_db_uri)
    target_cursor = target_connection.cursor()

    db_uri = Environment.instance().config.SQLALCHEMY_BINDS['stats']
    connection = psycopg2.connect(db_uri)
    cursor = connection.cursor()

    try:
        for table_name, columns in TABLES:
            print "Copying ", table_name

            # first, we need to lock the source table (ideally)
            # the problem with this is that we would need to modify permissions for production to allow
            # the lock for the user configured in SQLALCHEMY_DATABASE_URI
            # cursor.execute("LOCK TABLE %s" % table_name)

            # we need to assert the table is empty
            print "Counting ", table_name
            target_cursor.execute("select count(*) from %s" % table_name)
            count, = target_cursor.fetchone()
            assert count == 0, "Table %s "

            col_string = ','.join(columns)
            str_string = ','.join(["%s"] * len(columns))
            target_query = StringIO()
            target_query.write('insert into %s(%s) values ' % (table_name, col_string))
            print "Reading ", table_name
            cursor.execute('select %s from %s' % (col_string, table_name))
            for rec in cursor:
                target_query.write("(%s)," % target_cursor.mogrify(str_string, tuple(rec)))
            print "Writing ", table_name
            target_cursor.execute(target_query.getvalue()[:-1])

            # now we need to reset the sequence associated with the id for this table
            target_cursor.execute("select max(id) + 1 from %s" % table_name)
            nextone, = target_cursor.fetchone()
            print "Updating sequence for ", table_name
            target_cursor.execute("SELECT setval('%s_id_seq', %s, false)" % (table_name, nextone))
            print "Done ", table_name

    except Exception as e:
        print "Error ", e
        target_connection.rollback()
        connection.rollback()
    else:
        print "Good, well done, excellent."
        target_connection.commit()
        connection.commit()
    finally:
        connection.close()
        target_connection.close()
Example #2
0
def insert_distribution(url, channel_id, deployed, scheduled_dt, *args, **kwargs):
    from splice.environment import Environment

    # ensure that on insert, a distribution is either deployed or scheduled, not both
    if scheduled_dt is not None:
        deployed = False

    env = Environment.instance()
    conn = env.db.engine.connect()
    trans = conn.begin()
    try:
        conn.execute(
            text(
                "INSERT INTO distributions ("
                " url, channel_id, deployed, scheduled_start_date, created_at"
                ") "
                "VALUES ("
                " :url, :channel_id, :deployed, :scheduled_start_date, :created_at"
                ")"
            ),
            url=url,
            channel_id=channel_id,
            deployed=deployed,
            scheduled_start_date=scheduled_dt,
            created_at=datetime.utcnow()
        )
        trans.commit()
    except:
        trans.rollback()
        raise
Example #3
0
def tile_exists(target_url, bg_color, title, type, image_uri, enhanced_image_uri, locale, conn=None, *args, **kwargs):
    """
    Return the id of a tile having the data provided
    """
    from splice.environment import Environment
    env = Environment.instance()

    if conn is not None:
        sm = sessionmaker(bind=conn)
        session = sm()
    else:
        session = env.db.session

    # we add order_by in the query although it shouldn't be necessary
    # this is because of a previous bug where duplicate tiles could be created
    results = (
        session
        .query(Tile.id)
        .filter(Tile.target_url == target_url)
        .filter(Tile.bg_color == bg_color)
        .filter(Tile.title == title)
        .filter(Tile.image_uri == image_uri)
        .filter(Tile.enhanced_image_uri == enhanced_image_uri)
        .filter(Tile.locale == locale)
        .order_by(asc(Tile.id))
        .first()
    )

    if results:
        return results[0]

    return results
Example #4
0
def get_scheduled_distributions(minutes, dt_query=None):
    """
    Returns distributions scheduled from a point in time, and a leniency period
    within which a tasks could've been scheduled closed to that point.
    As a regular task, it is intended to run at least once hourly.
    :minutes: amount of time in the past from the query time which is still viable
    :dt_query: optionally set the date time to find schedules for
    """
    from splice.environment import Environment

    env = Environment.instance()

    if not minutes or not (0 < minutes < 60):
        raise ValueError("minutes needs to be a number between 1..59 inclusive")

    if dt_query is None:
        dt_query = datetime.utcnow()

    # getting around PEP8 E712 warning. This is necessary for SQLAlchemy
    false_value = False

    min_query_dt = dt_query - timedelta(minutes=minutes)

    stmt = (
        env.db.session
        .query(Distribution)
        .filter(Distribution.deployed == false_value)
        .filter(Distribution.scheduled_start_date.between(min_query_dt, dt_query))
    )

    dists = stmt.all()

    return dists
Example #5
0
def get_campaigns(account_id=None, past=True, in_flight=True, scheduled=True, utctoday=None):
    from splice.environment import Environment

    env = Environment.instance()

    query = env.db.session.query(Campaign)

    if account_id is not None:
        query = query.filter(Campaign.account_id == account_id)

    if utctoday is None:
        utctoday = datetime.utcnow().date()

    rows = query.order_by(Campaign.id.desc()).all()

    campaigns = []
    for row in rows:
        ret = row_to_dict(row)
        countries = []
        for country in row.countries:
            countries.append(country.country_code)
        ret['countries'] = countries

        # filter based on start and end dates unless an account ID is specified
        if ((past and row.end_date.date() <= utctoday) or
                (in_flight and row.end_date.date() >= utctoday >= row.start_date.date()) or
                (scheduled and row.start_date.date() >= utctoday)):
            campaigns.append(ret)

    return campaigns
Example #6
0
def get_upcoming_distributions(limit=100, leniency_minutes=15, include_past=False):
    """
    Obtain distributions, partitioned by channels with up to ``limit`` results
    for each channel
    :leniency_minutes: have a leniency in minutes up to the present when looking for distributions
    :include_past: always return all past distributions
    """
    from splice.environment import Environment

    env = Environment.instance()

    # getting around PEP8 E712 warning. This is necessary for SQLAlchemy
    false_value = False

    dist_cte = (
        env.db.session
        .query(
            Distribution.id,
            Distribution.channel_id,
            Distribution.url,
            Distribution.created_at,
            Distribution.scheduled_start_date,
            func.row_number().over(
                partition_by=Distribution.channel_id,
                order_by=Distribution.scheduled_start_date.asc())
            .label('row_num')
        )
        .filter(Distribution.deployed == false_value))

    if not include_past:
        min_dt = datetime.utcnow() - timedelta(minutes=leniency_minutes)
        dist_cte = (
            dist_cte
            .filter(Distribution.scheduled_start_date >= min_dt))

    dist_cte = dist_cte.cte()

    stmt = (
        env.db.session
        .query(
            dist_cte.c.id,
            dist_cte.c.channel_id,
            dist_cte.c.url,
            dist_cte.c.created_at,
            dist_cte.c.scheduled_start_date)
        .filter(dist_cte.c.row_num <= limit)
        .order_by(dist_cte.c.scheduled_start_date.asc())
    )

    rows = stmt.all()

    channels = {}

    for row in rows:
        c_dists = channels.setdefault(row.channel_id, [])
        c_dists.append({'id': row.id, 'url': row.url, 'created_at': row.created_at, 'scheduled_at': row.scheduled_start_date})

    return channels
Example #7
0
    def test_get_all_categories(self):
        """ Test for getting all categories"""
        url = url_for('api.init.init', target="categories")
        response = self.client.get(url)
        assert_equal(response.status_code, 200)
        categories = json.loads(response.data)['results']

        categories_fixture = Environment.instance()._load_categories()
        assert_equal(categories, categories_fixture)
Example #8
0
def _update_image(bucket, image_url, tile_id, column='image_uri'):
    env = Environment.instance()
    if image_url and not image_url.startswith('http'):
        imgs = list(bucket.list(prefix="images/%s" % image_url))
        if len(imgs):
            uri = os.path.join('https://%s.s3.amazonaws.com' % env.config.S3['bucket'], imgs[0])
            print "updating %s for tile=%s" % (column, tile_id)
            return "update tiles set %s = '%s' where id = %s" % (column, uri, tile_id)
    return None
Example #9
0
    def test_get_all_locale(self):
        """ Test for getting all locales"""
        url = url_for('api.init.init', target="locales")
        response = self.client.get(url)
        assert_equal(response.status_code, 200)
        locales = json.loads(response.data)['results']

        locales_fixture = Environment.instance()._load_locales()[:-1]
        locales_fixture.sort()
        assert_equal(locales, locales_fixture)
Example #10
0
def get_account(id):
    from splice.environment import Environment

    env = Environment.instance()

    row = (
        env.db.session
        .query(Account).get(id)
    )
    return row_to_dict(row) if row else None
Example #11
0
    def test_get_all_countries(self):
        """ Test for getting all countries"""
        url = url_for('api.init.init', target="countries")
        response = self.client.get(url)
        assert_equal(response.status_code, 200)
        countries = json.loads(response.data)['results']

        countries_fixture = Environment.instance()._load_countries()[:-1]
        items = [{"country_code": code, "country_name": name} for code, name in countries_fixture]
        assert_equal(countries, items)
Example #12
0
def get_accounts():
    from splice.environment import Environment

    env = Environment.instance()

    rows = (
        env.db.session
        .query(Account)
        .order_by(Account.id.desc())
        .all()
    )
    output = [row_to_dict(d) for d in rows]

    return output
Example #13
0
def get_channels(limit=100):
    from splice.environment import Environment

    env = Environment.instance()

    rows = (env.db.session.query(Channel.id, Channel.name,
                                 Channel.created_at).order_by(
                                     Channel.id.asc()).limit(limit).all())

    # ensure items are a list of dicts
    # KeyedTuples may serialize differently on other systems
    output = [d._asdict() for d in rows]

    return output
Example #14
0
def get_content(name):
    from splice.environment import Environment

    env = Environment.instance()

    row = env.db.session.query(Content).filter(Content.name == name).first()
    c = row_to_dict(row) if row else None
    if c is not None:
        versions = []
        for version in row.versions:
            versions.append(row_to_dict(version))
        c['versions'] = versions

    return c
Example #15
0
def setup_s3(bucket="bucket"):
    from splice.environment import Environment
    from boto.s3.cors import CORSConfiguration

    env = Environment.instance()
    bucket = env.s3.get_bucket(env.config.S3[bucket])
    cors = CORSConfiguration()
    cors.add_rule("GET", "*", allowed_header="*")
    bucket.set_cors(cors)
    headers = {
        'Cache-Control': 'public, max-age=31536000',
        'Content-Disposition': 'inline',
    }
    return bucket, headers
Example #16
0
def get_campaign(campaign_id):
    from splice.environment import Environment

    env = Environment.instance()

    row = (env.db.session.query(Campaign).get(campaign_id))
    if row:
        ret = row_to_dict(row)
        countries = []
        for country in row.countries:
            countries.append(country.country_code)
        ret['countries'] = countries
        return ret
    else:
        return None
Example #17
0
def tile_exists(target_url, bg_color, title, typ, image_uri, enhanced_image_uri, locale,
                frecent_sites, time_limits, frequency_caps, adgroup_name, explanation, check_inadjacency, channel_id, conn=None, *args, **kwargs):
    """
    Return the id of a tile having the data provided
    """
    from splice.environment import Environment
    env = Environment.instance()

    if conn is not None:
        sm = sessionmaker(bind=conn)
        session = sm()
    else:
        session = env.db.session

    # we add order_by in the query although it shouldn't be necessary
    # this is because of a previous bug where duplicate tiles could be created
    results = (
        session
        .query(Tile.id, Tile.adgroup_id)
        .filter(Tile.target_url == target_url)
        .filter(Tile.bg_color == bg_color)
        .filter(Tile.title == title)
        .filter(Tile.type == typ)
        .filter(Tile.image_uri == image_uri)
        .filter(Tile.enhanced_image_uri == enhanced_image_uri)
        .filter(Adgroup.locale == locale)
        .filter(Adgroup.start_date == time_limits.get('start'))
        .filter(Adgroup.end_date == time_limits.get('end'))
        .filter(Adgroup.start_date_dt == time_limits.get('start_dt'))
        .filter(Adgroup.end_date_dt == time_limits.get('end_dt'))
        .filter(Adgroup.frequency_cap_daily == frequency_caps['daily'])
        .filter(Adgroup.frequency_cap_total == frequency_caps['total'])
        .filter(Adgroup.name == adgroup_name)
        .filter(Adgroup.explanation == explanation)
        .filter(Adgroup.check_inadjacency == check_inadjacency)
        .filter(Adgroup.channel_id == channel_id)
        .join(Adgroup.tiles)
        .order_by(asc(Tile.id))
    )

    if results:
        for tile_id, adgroup_id in results:
            # now check frecent sites for this tile
            db_frecents = get_frecent_sites_for_tile(tile_id, conn)
            if db_frecents == sorted(set(frecent_sites)):
                return tile_id, adgroup_id

    return None, None
Example #18
0
def get_adgroup(id):
    from splice.environment import Environment

    env = Environment.instance()

    row = (env.db.session.query(Adgroup).get(id))
    if row is None:
        return None

    new = row_to_dict(row)
    categories = []
    for category in row.categories:
        categories.append(category.category)
    new['categories'] = categories

    return new
Example #19
0
    def test_single_creative_upload_endpoint(self):
        """Test the API endpoint for the single creative upload"""
        from splice.environment import Environment

        env = Environment.instance()
        url = url_for('api.tile.handler_creative_upload')
        with zipfile.ZipFile(self.zip_file, "r") as zf:
            f = zf.getinfo("samples/firefox_mdn_a.png")
            data = {'creative': (StringIO.StringIO(zf.read(f)), 'creative.png')}
            response = self.client.post(url, data=data)
            assert_equal(response.status_code, 200)
            creative_url = json.loads(response.data)['result']
            bucket = env.s3.get_bucket(env.config.S3["bucket"])
            s3_key = os.path.basename(creative_url)
            key = bucket.get_key(s3_key)
            self.assertIsNotNone(key)
Example #20
0
def get_contents():
    from splice.environment import Environment

    env = Environment.instance()

    rows = (env.db.session.query(Content).order_by(Content.id.desc()).all())

    output = []
    for d in rows:
        versions = []
        for version in d.versions:
            versions.append(row_to_dict(version))
        c = row_to_dict(d)
        c['versions'] = versions
        output.append(c)

    return output
Example #21
0
def get_tiles():
    from splice.environment import Environment

    env = Environment.instance()

    rows = (
        env.db.session
        .query(Tile.id, Tile.adgroup_id, Tile.title, Tile.type, Tile.bg_color, Tile.target_url)
        .order_by(Tile.id.asc())
        .all()
    )

    # ensure items are a list of dicts
    # KeyedTuples may serialize differently on other systems
    output = [d._asdict() for d in rows]

    return output
Example #22
0
def get_adgroups():
    from splice.environment import Environment

    env = Environment.instance()

    rows = (
        env.db.session
        .query(Adgroup.id, Adgroup.locale)
        .order_by(Adgroup.id)
        .all()
    )

    # ensure items are a list of dicts
    # KeyedTuples may serialize differently on other systems
    output = [d._asdict() for d in rows]

    return output
Example #23
0
def get_channels(limit=100):
    from splice.environment import Environment

    env = Environment.instance()

    rows = (
        env.db.session
        .query(Channel.id, Channel.name, Channel.created_at)
        .order_by(Channel.id.asc())
        .limit(limit)
        .all()
    )

    # ensure items are a list of dicts
    # KeyedTuples may serialize differently on other systems
    output = [d._asdict() for d in rows]

    return output
Example #24
0
def get_adgroups_by_campaign_id(campaign_id):
    from splice.environment import Environment

    env = Environment.instance()

    rows = (env.db.session.query(Adgroup).filter(
        Adgroup.campaign_id == campaign_id).order_by(Adgroup.id.desc()).all())

    output = []
    for d in rows:
        new = row_to_dict(d)
        categories = []
        for category in d.categories:
            categories.append(category.category)
        new['categories'] = categories
        output.append(new)

    return output
Example #25
0
def get_campaign(campaign_id):
    from splice.environment import Environment

    env = Environment.instance()

    row = (
        env.db.session
        .query(Campaign).get(campaign_id)
    )
    if row:
        ret = row_to_dict(row)
        countries = []
        for country in row.countries:
            countries.append(country.country_code)
        ret['countries'] = countries
        return ret
    else:
        return None
Example #26
0
def unschedule_distribution(dist_id):
    """
    Remove a distribution id if it is scheduled but not deployed yet
    """
    from splice.environment import Environment

    env = Environment.instance()

    # getting around PEP8 E711 warning. This is necessary for SQLAlchemy
    none_value = None

    stmt = (env.db.session.query(Distribution).filter(
        Distribution.id == dist_id).filter(
            Distribution.scheduled_start_date != none_value))

    dist = stmt.one()
    dist.scheduled_start_date = None
    env.db.session.commit()
Example #27
0
    def test_single_creative_upload_endpoint(self):
        """Test the API endpoint for the single creative upload"""
        from splice.environment import Environment

        env = Environment.instance()
        url = url_for('api.tile.handler_creative_upload')
        with zipfile.ZipFile(self.zip_file, "r") as zf:
            f = zf.getinfo("samples/firefox_mdn_a.png")
            data = {
                'creative': (StringIO.StringIO(zf.read(f)), 'creative.png')
            }
            response = self.client.post(url, data=data)
            assert_equal(response.status_code, 200)
            creative_url = json.loads(response.data)['result']
            bucket = env.s3.get_bucket(env.config.S3["bucket"])
            s3_key = os.path.basename(creative_url)
            key = bucket.get_key(s3_key)
            self.assertIsNotNone(key)
Example #28
0
def get_distributions(limit=100, *args, **kwargs):
    from splice.environment import Environment

    env = Environment.instance()

    rows = (
        env.db.session
        .query(Distribution.url, Distribution.created_at)
        .order_by(Distribution.id.desc())
        .limit(limit)
        .all()
    )

    # ensure items are lists of lists rather than KeyedTuples
    # KeyedTuples may serialize differently on other systems
    output = [list(d) for d in rows]

    return output
Example #29
0
def get_adgroup(id):
    from splice.environment import Environment

    env = Environment.instance()

    row = (
        env.db.session
        .query(Adgroup).get(id)
    )
    if row is None:
        return None

    new = row_to_dict(row)
    categories = []
    for category in row.categories:
        categories.append(category.category)
    new['categories'] = categories

    return new
Example #30
0
def switch_to_cdn_url(image_uri):
    """Switch the S3 URI with the CDN URI

    We store the S3 URI in the database to allow campaign managers to view the
    uploaded images without suffering from the CDN latency. When preparing to
    generate tiles for the Firefox, it's necessary to replace the S3 URIs with
    the CDN ones, as Firefox only allows images hosted on a trusted URI, e.g.
    "tiles.cdn.mozilla.net".

    See https://github.com/oyiptong/splice/issues/203 for more details.
    """
    from splice.environment import Environment

    env = Environment.instance()
    try:
        basename = os.path.basename(image_uri)
    except:
        basename = image_uri  # if the image_uri is a hash string, use it directly
    return os.path.join(env.config.CLOUDFRONT_BASE_URL, "images/%s" % basename)
Example #31
0
def switch_to_cdn_url(image_uri):
    """Switch the S3 URI with the CDN URI

    We store the S3 URI in the database to allow campaign managers to view the
    uploaded images without suffering from the CDN latency. When preparing to
    generate tiles for the Firefox, it's necessary to replace the S3 URIs with
    the CDN ones, as Firefox only allows images hosted on a trusted URI, e.g.
    "tiles.cdn.mozilla.net".

    See https://github.com/oyiptong/splice/issues/203 for more details.
    """
    from splice.environment import Environment

    env = Environment.instance()
    try:
        basename = os.path.basename(image_uri)
    except:
        basename = image_uri  # if the image_uri is a hash string, use it directly
    return os.path.join(env.config.CLOUDFRONT_BASE_URL, "images/%s" % basename)
Example #32
0
def get_stats(group_by, filters=None, limit=60):
    """
    Get aggregated stats based on a list of group_by fields and filters
    """
    from splice.environment import Environment
    env = Environment.instance()

    isd = aliased(impression_stats_daily)
    base_table = isd
    local_filters = filters.copy()

    has_cross_db_filters = bool(
        CROSS_DB_COLUMNS.intersection(filters)) if filters else False
    cross_db_group_by = list(CROSS_DB_COLUMNS.intersection(group_by))

    # Build base table and list of tiles
    if cross_db_group_by:
        base_table = build_subquery_table(env=env,
                                          stats_table=isd,
                                          group_by=group_by,
                                          cross_db_group_by=cross_db_group_by,
                                          filters=filters)
        # No tiles were found, so no stats
        if base_table is None:
            return None

    elif has_cross_db_filters:
        tiles_result = get_tiles(limit_fields=['id'], filters=filters)
        # No tiles were found, so no stats
        if not tiles_result:
            return None
        local_filters['tile_id'] = [t['id'] for t in tiles_result]

    # Build query
    rows = build_base_query(env=env, group_by=group_by, base_table=base_table)
    rows = add_filters(query=rows,
                       base_table=base_table,
                       filters=local_filters)
    rows = rows.order_by(base_table.c[group_by[0]]).limit(limit)
    rows = rows.all()

    return [tuple_to_dict(r) for r in rows] if rows else None
Example #33
0
def tile_exists(target_url, bg_color, title, typ, image_uri, enhanced_image_uri, locale,
                frecent_sites, time_limits, channel_id, conn=None, *args, **kwargs):
    """
    Return the id of a tile having the data provided
    """
    from splice.environment import Environment
    env = Environment.instance()

    if conn is not None:
        sm = sessionmaker(bind=conn)
        session = sm()
    else:
        session = env.db.session

    # we add order_by in the query although it shouldn't be necessary
    # this is because of a previous bug where duplicate tiles could be created
    results = (
        session
        .query(Tile.id, Tile.adgroup_id)
        .filter(Tile.target_url == target_url)
        .filter(Tile.bg_color == bg_color)
        .filter(Tile.title == title)
        .filter(Tile.image_uri == image_uri)
        .filter(Tile.enhanced_image_uri == enhanced_image_uri)
        .filter(Adgroup.locale == locale)
        .filter(Adgroup.start_date == time_limits.get('start'))
        .filter(Adgroup.end_date == time_limits.get('end'))
        .filter(Adgroup.start_date_dt == time_limits.get('start_dt'))
        .filter(Adgroup.end_date_dt == time_limits.get('end_dt'))
        .filter(Adgroup.channel_id == channel_id)
        .join(Adgroup.tiles)
        .order_by(asc(Tile.id))
    )

    if results:
        for tile_id, adgroup_id in results:
            # now check frecent sites for this tile
            db_frecents = get_frecent_sites_for_tile(tile_id, conn)
            if db_frecents == sorted(set(frecent_sites)):
                return tile_id, adgroup_id

    return None, None
Example #34
0
def get_all_distributions(limit=100):
    """
    Obtain distributions, partitioned by channels with up to ``limit`` results
    for each channel
    """
    from splice.environment import Environment

    env = Environment.instance()

    dist_cte = (
        env.db.session
        .query(
            Distribution.channel_id,
            Distribution.url,
            Distribution.created_at,
            func.row_number().over(
                partition_by=Distribution.channel_id,
                order_by=Distribution.created_at.desc())
            .label('row_num')
        )
    ).cte()

    stmt = (
        env.db.session
        .query(
            dist_cte.c.channel_id,
            dist_cte.c.url,
            dist_cte.c.created_at)
        .filter(dist_cte.c.row_num <= limit)
        .order_by(dist_cte.c.created_at.desc())
    )

    rows = stmt.all()

    channels = {}

    for row in rows:
        c_dists = channels.setdefault(row.channel_id, [])
        c_dists.append({'url': row.url, 'created_at': row.created_at})

    return channels
Example #35
0
def get_all_distributions(limit=100):
    """
    Obtain distributions, partitioned by channels with up to ``limit`` results
    for each channel
    """
    from splice.environment import Environment

    env = Environment.instance()

    dist_cte = (
        env.db.session
        .query(
            Distribution.channel_id,
            Distribution.url,
            Distribution.created_at,
            func.row_number().over(
                partition_by=Distribution.channel_id,
                order_by=Distribution.created_at.desc())
            .label('row_num')
        )
    ).cte()

    stmt = (
        env.db.session
        .query(
            dist_cte.c.channel_id,
            dist_cte.c.url,
            dist_cte.c.created_at)
        .filter(dist_cte.c.row_num <= limit)
        .order_by(dist_cte.c.created_at.desc())
    )

    rows = stmt.all()

    channels = {}

    for row in rows:
        c_dists = channels.setdefault(row.channel_id, [])
        c_dists.append({'url': row.url, 'created_at': row.created_at})

    return channels
Example #36
0
def get_tile_ids_by_group(group_by, filters=None):
    from splice.environment import Environment
    env = Environment.instance()

    group_by_field = {
        'category': AdgroupCategory.category,
        'account_id': Account.id,
        'campaign_id': Campaign.id,
        'adgroup_id': Adgroup.id
    }.get(group_by)

    rows = (env.db.session.query(
        group_by_field.label(group_by),
        func.array_agg(Tile.id).label('tile_ids')).select_from(Tile).group_by(
            group_by_field))

    rows = add_joins_for_group_by(query=rows, group_by=group_by)
    rows = add_filters(rows, filters, group_by)
    rows = rows.all()

    return [tuple_to_dict(r) for r in rows] if rows else None
Example #37
0
def unschedule_distribution(dist_id):
    """
    Remove a distribution id if it is scheduled but not deployed yet
    """
    from splice.environment import Environment

    env = Environment.instance()

    # getting around PEP8 E711 warning. This is necessary for SQLAlchemy
    none_value = None

    stmt = (
        env.db.session
        .query(Distribution)
        .filter(Distribution.id == dist_id)
        .filter(Distribution.scheduled_start_date != none_value)
    )

    dist = stmt.one()
    dist.scheduled_start_date = None
    env.db.session.commit()
Example #38
0
def insert_tile(target_url, bg_color, title, type, image_uri, enhanced_image_uri, locale, conn=None, *args, **kwargs):

    from splice.environment import Environment
    env = Environment.instance()

    trans = None
    if conn is None:
        conn = env.db.engine.connect()
        trans = conn.begin()

    try:
        conn.execute(

            text(
                "INSERT INTO tiles ("
                " target_url, bg_color, title, type, image_uri, enhanced_image_uri, locale, created_at"
                ") "
                "VALUES ("
                " :target_url, :bg_color, :title, :type, :image_uri, :enhanced_image_uri, :locale, :created_at"
                ")"
            ),
            target_url=target_url,
            bg_color=bg_color,
            title=title,
            type=type,
            image_uri=image_uri,
            enhanced_image_uri=enhanced_image_uri,
            locale=locale,
            created_at=datetime.utcnow()
        )

        result = conn.execute("SELECT MAX(id) FROM tiles;").scalar()
        if trans is not None:
            trans.commit()
        return result
    except:
        if trans is not None:
            trans.rollback()
        raise
Example #39
0
def get_contents():
    from splice.environment import Environment

    env = Environment.instance()

    rows = (
        env.db.session
        .query(Content)
        .order_by(Content.id.desc())
        .all()
    )

    output = []
    for d in rows:
        versions = []
        for version in d.versions:
            versions.append(row_to_dict(version))
        c = row_to_dict(d)
        c['versions'] = versions
        output.append(c)

    return output
Example #40
0
def setup_routes(app):
    env = Environment.instance()
    global register_flask_restful

    if "signing" in env.config.ALLOWED_APPS:
        import splice.web.api.content
        splice.web.api.content.register_routes(app)

    if "tiles" in env.config.ALLOWED_APPS:
        import splice.web.views
        splice.web.views.register_routes(app)

        import splice.web.api.heartbeat
        splice.web.api.heartbeat.register_routes(app)

        if not register_flask_restful:
            import splice.web.api.init
            splice.web.api.init.register_routes(app)

            import splice.web.api.account
            splice.web.api.account.register_routes(app)

            import splice.web.api.campaign
            splice.web.api.campaign.register_routes(app)

            import splice.web.api.adgroup
            splice.web.api.adgroup.register_routes(app)

            import splice.web.api.tile
            splice.web.api.tile.register_routes(app)

            import splice.web.api.reporting
            splice.web.api.reporting.register_routes(app)

            import splice.web.api.distribution
            splice.web.api.distribution.register_routes(app)

            register_flask_restful = True
Example #41
0
def setup_routes(app):
    env = Environment.instance()
    global register_flask_restful

    if "signing" in env.config.ALLOWED_APPS:
        import splice.web.api.content
        splice.web.api.content.register_routes(app)

    if "tiles" in env.config.ALLOWED_APPS:
        import splice.web.views
        splice.web.views.register_routes(app)

        import splice.web.api.heartbeat
        splice.web.api.heartbeat.register_routes(app)

        if not register_flask_restful:
            import splice.web.api.init
            splice.web.api.init.register_routes(app)

            import splice.web.api.account
            splice.web.api.account.register_routes(app)

            import splice.web.api.campaign
            splice.web.api.campaign.register_routes(app)

            import splice.web.api.adgroup
            splice.web.api.adgroup.register_routes(app)

            import splice.web.api.tile
            splice.web.api.tile.register_routes(app)

            import splice.web.api.reporting
            splice.web.api.reporting.register_routes(app)

            import splice.web.api.distribution
            splice.web.api.distribution.register_routes(app)

            register_flask_restful = True
Example #42
0
    def setUp(self):
        super(TestReporting, self).setUp()

        def values(fd, date_index=0):
            for line in fd:
                row = [el.decode('utf-8') for el in line.split(',')]
                # sqlalchemy doesn't like date strings....
                row[date_index] = datetime.strptime(row[date_index], "%Y-%m-%d")
                yield row

        # load db
        from splice.models import impression_stats_daily, newtab_stats_daily
        conn = Environment.instance().db.engine.connect()

        with open(self.get_fixture_path('impression_stats.csv')) as fd:
            for row in values(fd, 1):
                ins = impression_stats_daily.insert().values(row)
                conn.execute(ins)

        with open(self.get_fixture_path('newtabs.csv')) as fd:
            for row in values(fd):
                ins = newtab_stats_daily.insert().values(row)
                conn.execute(ins)
Example #43
0
def insert_distribution(url, *args, **kwargs):
    from splice.environment import Environment

    env = Environment.instance()
    conn = env.db.engine.connect()
    trans = conn.begin()
    try:
        conn.execute(
            text(
                "INSERT INTO distributions ("
                " url, created_at"
                ") "
                "VALUES ("
                " :url, :created_at"
                ")"
            ),
            url=url,
            created_at=datetime.utcnow()
        )
        trans.commit()
    except:
        trans.rollback()
        raise
Example #44
0
def get_adgroups_by_campaign_id(campaign_id):
    from splice.environment import Environment

    env = Environment.instance()

    rows = (
        env.db.session
        .query(Adgroup)
        .filter(Adgroup.campaign_id == campaign_id)
        .order_by(Adgroup.id.desc())
        .all()
    )

    output = []
    for d in rows:
        new = row_to_dict(d)
        categories = []
        for category in d.categories:
            categories.append(category.category)
        new['categories'] = categories
        output.append(new)

    return output
Example #45
0
    def get(self, target):
        """Returns the init data including locales, countries, channels etc.

        Params: target string, [all|locales|countries|channels]
        """
        target = target.lower()
        if target == "all":
            locales = Environment.instance()._load_locales()[:-1]
            locales.sort()
            countries = Environment.instance()._load_countries()[:-1]
            country_items = [{"country_code": code, "country_name": name} for code, name in countries]
            channels = get_channels()
            categories = Environment.instance()._load_categories()
            data = {
                "countries": country_items,
                "channels": channels,
                "locales": locales,
                "categories": categories,
            }
            return {'result': marshal(data, all_fields)}
        elif target == "locales":
            # the last item is 'ERROR', client won't need this
            locales = Environment.instance()._load_locales()[:-1]
            locales.sort()
            return marshal({"results": locales}, locale_fields)
        elif target == "countries":
            # the last item is 'ERROR', client won't need this
            countries = Environment.instance()._load_countries()[:-1]
            country_items = [{"country_code": code, "country_name": name} for code, name in countries]
            return {'results': marshal(country_items, country_fields)}
        elif target == "channels":
            channels = get_channels()
            return {'results': marshal(channels, channel_fields)}
        elif target == "categories":
            categories = Environment.instance()._load_categories()
            return marshal({"results": categories}, category_fields)
        else:
            return {"message": "Unknown target, must be one of [all|locales|countries|channels]"}, 404
Example #46
0
def get_country_code():
    for code, name in Environment.instance()._load_countries():
        yield dict(country_name=name, country_code=code)
Example #47
0
def main():
    """
    Usage:
    manage.py db upgrade
    python index_walker.py

    This script is going to populate Account and Campaign database structures.  It does this
    by reading the currently deployed tile distributions (s3), where it determines the currently active
    tile set, as well as the geo-targetting data (currently only country level) for each tile/adgroup.

    The script will discriminate between 'active' and 'inactive' adgroups based on whether or not
    the adgroup exists in the current distribution.  Inactive adgroups are given start/end dates
    in campaigns that are in the *past*.  Active adgroups are placed in campaigns that start on their
    adgroup creation date and end at some far distant future date.

    We are using some data structures developed by the Zenko project to build the derive_account_campaign()
    function in order to identify existing campaigns from our tile data.

    Campaign objects are considered unique by grouping together the following keys in the adgroup:
    * the name of the campaign and account returned by derive_account_campaign()
    * the channel of the adgroup
    * the 'active' flag (determined as explained above) of the adgroup

    One campaign row will be assigned for each unique campaign detected.

    The script will populate the adgroup.campaign_id with the campaign that the adgroup fits into.

    All writes to the database are transactional.

    This script is *not* idempotent, and will therefore check that accounts and campaigns tables are empty before running.

    :return:
    """
    index_files = [
        'https://tiles-resources-prod-tiless3-qbv71djahz3b.s3.amazonaws.com/hello_tile_index_v3.json',
        'https://tiles-resources-prod-tiless3-qbv71djahz3b.s3.amazonaws.com/android_tile_index_v3.json',
        'https://tiles-resources-prod-tiless3-qbv71djahz3b.s3.amazonaws.com/desktop_tile_index_v3.json',
        'https://tiles-resources-prod-tiless3-qbv71djahz3b.s3.amazonaws.com/desktop-prerelease_tile_index_v3.json'
    ]
    active_tiles = set()
    tile_geodes = defaultdict(set)

    for index in index_files:
        r = requests.get(index)
        if 200 <= r.status_code <= 299:
            data = json.loads(r.text)

            for geo_locale, dist_dict in data.iteritems():
                try:
                    ag = dist_dict.get('ag')
                    if ag:
                        geode = tuple(geo_locale.split('/'))
                        print "processing ", geo_locale
                        ag_r = requests.get(ag)
                        if 200 <= ag_r.status_code <= 299:
                            tiles = json.loads(ag_r.text)
                            directory_tiles = tiles['directory']
                            suggested_tiles = tiles['suggested']
                            newts = set(chain((t['directoryId'] for t in directory_tiles),
                                        (t['directoryId'] for t in suggested_tiles)))
                            active_tiles.update(newts)
                            for tile_id in newts:
                                tile_geodes[tile_id].add(geode)
                except:
                    print "skipping ", geo_locale

    # print "active", str(active_tiles)

    env = Environment.instance()

    db_uri = env.config.SQLALCHEMY_DATABASE_URI
    engine = create_engine(db_uri)
    connection = engine.connect()

    try:
        # assert that campaigns and accounts are empty
        account_count, = connection.execute("SELECT count(*) FROM accounts").fetchone()
        assert account_count == 0, "Accounts not empty"
        campaign_count, = connection.execute("SELECT count(*) FROM campaigns").fetchone()
        assert campaign_count == 0, "Campaigns not empty"

        # collate/generate campaign and account data
        # stmt = select([Adgroup.id, Tile.target_url, Adgroup.channel_id, Adgroup.created_at]). \
        #     where(Tile.adgroup_id == Adgroup.id)
        stmt = """SELECT a.id, t.target_url, t.title, a.channel_id, a.created_at, c.name,
                    t.id, t.image_uri, t.enhanced_image_uri
                  FROM adgroups a
                  JOIN tiles t on t.adgroup_id = a.id
                  JOIN channels c on a.channel_id = c.id"""
        result = connection.execute(stmt)

        campaign_id = 0
        account_id = 0
        campaigns = dict()
        adgroups = defaultdict(list)
        countries = defaultdict(set)
        accounts = dict()

        for adgroup_id, url, title, channel, created_at, channel_name, tile_id, i_url, ei_url in result:
            assert all(x is not None for x in (adgroup_id, url, channel)), \
                "Some of %s is None" % str((adgroup_id, url, channel))

            # do tld -> account mapping substitution
            active = adgroup_id in active_tiles
            account_name, campaign_name = derive_account_campaign(adgroup_id, title, url)
            curr = (account_name, campaign_name, channel, active)
            if curr not in campaigns:
                # this is a new campaign, see if it's active
                campaign_id += 1
                if active:
                    # print "active", curr
                    start_date = created_at.date()
                    end_date = ARBITRARY_FUTURE
                else:
                    start_date = created_at.date()
                    end_date = created_at.date()

                # insert it into the right account
                if account_name not in accounts:
                    account_id += 1
                    next_account_id = account_id
                    accounts[account_name] = account_id
                else:
                    next_account_id = accounts[account_name]

                active_name = '' if active else ' (Closed)'
                ctuple = (campaign_id, start_date, end_date,
                          "%s %s%s" % (safe_str(campaign_name), channel_name, active_name),
                          False, channel, next_account_id)
                campaigns[curr] = ctuple

                # append all the countries
                for sub_country_code, sub_locale in tile_geodes[adgroup_id]:
                    countries[campaign_id].add(sub_country_code)
                # this fixes the closed campaigns can't get the correct country code as above
                if account_name in _campaign_countries:
                    countries[campaign_id] = countries[campaign_id].union(_campaign_countries[account_name])
                # print "campaign", ctuple

            adgroups[campaigns[curr][0]].append(adgroup_id)

        # insert data into new tables
        Session = sessionmaker(bind=engine)
        session = Session()

        # we need to monkeypatch flask's monkeypatch...
        session._model_changes = None

        try:
            # grab all s3 images and reproduce image hash
            bucket = env.s3.get_bucket(env.config.S3["bucket"])
            images = bucket.list('images/')

            image_hashes = defaultdict(list)
            enhanced_image_hashes = defaultdict(list)
            stmt = "SELECT t.id, t.image_uri, t.enhanced_image_uri FROM tiles t"
            for tile_id, image_uri, enhanced_image_uri in connection.execute(stmt):
                image_hashes[image_uri].append(tile_id)
                enhanced_image_hashes[enhanced_image_uri].append(tile_id)

            for image in images:
                ext = image.key.split('.')[-1]
                if ext == 'svg':
                    ext = 'svg+xml'
                elif ext == 'jpeg':
                    ext = 'jpg'
                new_hash = hashlib.sha1("data:image/%s;base64,%s" %
                                        (ext, base64.b64encode(image.get_contents_as_string()))).hexdigest()
                new_uri = image.generate_url(expires_in=0, query_auth=False)
                # remove x-amz-security-token, which is inserted even if query_auth=False
                # ref: https://github.com/boto/boto/issues/1477
                uri = furl(new_uri)
                try:
                    uri.args.pop('x-amz-security-token')
                except:
                    pass
                new_uri = uri.url

                tile_ids = image_hashes.get(new_hash)
                if tile_ids:
                    print "image: %s" % image.key
                    session.execute("update tiles set image_uri = '%s' where id in (%s)" %
                                    (new_uri, ','.join(str(tid) for tid in tile_ids)))

                tile_ids = enhanced_image_hashes.get(new_hash)
                if tile_ids:
                    print "enhanced_image: %s" % image.key
                    session.execute("update tiles set enhanced_image_uri = '%s' where id in (%s)" %
                                    (new_uri, ','.join(str(tid) for tid in tile_ids)))

            account_stmt = insert(Account).values([dict(id=aid, name=aname) for aname, aid in accounts.iteritems()])
            session.execute(account_stmt)
            session.execute("SELECT setval('accounts_id_seq', %s, false)" % (account_id + 1))

            target_query = StringIO()
            target_query.write("""insert into campaigns(id, start_date, end_date, name, paused, channel_id, account_id) values """)
            pg2_cursor = session.connection().connection.cursor()
            for campaign_tuple in campaigns.values():
                # print "%s %s" % (type(campaign_tuple), campaign_tuple)
                target_query.write(unicode(pg2_cursor.mogrify("(%s,%s,%s,%s,%s,%s,%s),", campaign_tuple)))
            session.execute(target_query.getvalue()[:-1])
            session.execute("SELECT setval('campaigns_id_seq', %s, false)" % (campaign_id + 1))

            cc_stmt = insert(CampaignCountry).values([dict(country_code=cc, campaign_id=cid)
                                                      for cid, cc_list in countries.iteritems()
                                                      for cc in cc_list])
            session.execute(cc_stmt)

            adgroup_updates = [update(Adgroup)
                               .where(Adgroup.id.in_(tuple(adgroup_ids)))
                               .values(dict(campaign_id=cid, type="directory", name="adgoup_cpmg_%d" % cid))
                               for cid, adgroup_ids in adgroups.iteritems()]
            for adgroup_stmt in adgroup_updates:
                session.execute(adgroup_stmt)
            # set the type for the suggested adgroups
            session.execute("update adgroups set type = 'suggested' where id in (select distinct adgroup_id from adgroup_sites)")

            session.commit()
        except Exception as e:
            print "Error: ", str(e)
            session.rollback()
            raise e
    finally:
        connection.close()
        print "done"
Example #48
0
import os
import csv
from splice.environment import Environment
from splice.webapp import create_webapp
from flask.ext.testing import TestCase

db_uri = os.environ.get('TEST_DB_URI') or 'postgres://localhost/splice_test'
env = Environment.instance(test=True, test_db_uri=db_uri)


class BaseTestCase(TestCase):
    def __init__(self, methodName='runTest'):
        self.env = env
        super(BaseTestCase, self).__init__(methodName)
        create_webapp(self.env)

    def create_app(self):
        return self.env.application

    def setUp(self):
        self.env.db.drop_all()
        self.create_app()
        self.env.db.create_all()

        def tile_values(fd):
            for line in fd:
                row = [el.decode('utf-8') for el in line.split(',')]
                yield dict(
                    zip(('target_url', 'bg_color', 'title', 'type',
                         'image_uri', 'enhanced_image_uri', 'adgroup_id',
                         'locale'), row))
Example #49
0
 def load(self):
     # Step needed to get around flask's import time side-effects
     from splice.environment import Environment
     env = Environment.instance()
     return env.application
Example #50
0
import calendar
from datetime import datetime, timedelta
from nose.tools import assert_equal
from flask import url_for, json
from mock import Mock, PropertyMock
from tests.base import BaseTestCase
from tests.test_scheduling import ScheduleTest
import splice.ingest
from splice.queries import (
    get_scheduled_distributions,
    get_all_distributions,
    get_channels)
from splice.environment import Environment

env = Environment.instance()


class TestAuthoring(BaseTestCase):
    def setUp(self):
        self.key_mock = Mock()
        self.key_mock.name = PropertyMock()
        self.bucket_mock = Mock()

        def bucket_get_key_mock(*args, **kwargs):
            return None
        self.bucket_mock.get_key = Mock(side_effect=bucket_get_key_mock)

        def get_key_mock(*args, **kwargs):
            return self.key_mock
        splice.ingest.Key = Mock(side_effect=get_key_mock)
Example #51
0
        return True

    # copy data to modify inplace, do NOT mutate the original input, cause
    # memory is much cheaper than people's mind
    data = copy.deepcopy(data)
    is_compact = "assets" in data
    try:
        jsonschema.validate(data, get_payload_schema(is_compact))
    except jsonschema.exceptions.ValidationError, e:
        command_logger.error("ERROR: cannot validate JSON: {0}".format(
            e.message))
        exc_class, exc, tb = sys.exc_info()
        raise exc_class, exc, tb

    from splice.environment import Environment
    env = Environment.instance()
    conn = env.db.engine.connect()

    if is_compact:
        assets, distributions = data["assets"], data["distributions"]
    else:
        assets, distributions = None, data

    ingested_data = {}
    country_locales = sorted(distributions.keys())

    try:
        with session_scope(conn) as session:
            if not env.is_test:
                # lock the tables to avoid other concurrent write transactions
                session.execute(
Example #52
0
def generate_artifacts(data, channel_name, deploy):
    """Generate locale json files for upload to s3
    :param data: tile data for upload
    :channel_name: distribution channel name
    :deploy: tells whether to deploy to the channels
    """

    artifacts = []
    tile_index = {'__ver__': 3}
    image_index = {}
    env = Environment.instance()

    def image_add(hash, mime_type, image, locale, tile_id, *args, **kwargs):
        """
        Add an image to the index and artifact list, return file url
        """
        if hash not in image_index:
            try:
                file_ext = mime_extensions[mime_type]
            except:
                raise IngestError(
                    "Unsupported file type: {0}".format(mime_type))
            s3_key = "images/{0}.{1}.{2}".format(hash, len(image), file_ext)
            url = os.path.join(env.config.CLOUDFRONT_BASE_URL, s3_key)

            image_index[hash] = url
            artifacts.append({"mime": mime_type, "key": s3_key, "data": image})

        return image_index[hash]

    safe_channel_name = urllib.quote(channel_name)

    for country_locale, tile_data in data.iteritems():
        sug_tiles = []
        dir_tiles = []

        country_code, locale = country_locale.split("/")
        # copy data to modify inplace
        tile_data = copy.deepcopy(tile_data)

        for tile in tile_data:
            # image splitting from input
            url = image_add(*slice_image_uri(tile["imageURI"]),
                            locale=locale,
                            tile_id=tile["directoryId"])
            tile["imageURI"] = url

            if 'enhancedImageURI' in tile:
                url = image_add(*slice_image_uri(tile["enhancedImageURI"]),
                                locale=locale,
                                tile_id=tile["directoryId"])
                tile["enhancedImageURI"] = url
            if 'frecent_sites' in tile:
                sug_tiles.append(tile)
            else:
                dir_tiles.append(tile)

        # deploy both v2 and v3 versions
        if deploy:
            # v2

            legacy_tiles = copy.deepcopy(dir_tiles)
            for tile in legacy_tiles:
                # remove extra metadata
                for key in ('frequency_caps', 'adgroup_name',
                            'adgroup_categories', 'explanation',
                            'check_inadjacency', 'time_limits'):
                    tile.pop(key, None)

            legacy = json.dumps({locale: legacy_tiles}, sort_keys=True)
            legacy_hsh = hashlib.sha1(legacy).hexdigest()
            legacy_key = "{0}/{1}.{2}.json".format(safe_channel_name,
                                                   country_locale, legacy_hsh)
            artifacts.append({
                "key": legacy_key,
                "data": legacy,
            })

            # v3
            ag = json.dumps({
                'suggested': sug_tiles,
                'directory': dir_tiles
            },
                            sort_keys=True)
            ag_hsh = hashlib.sha1(ag).hexdigest()
            ag_key = "{0}/{1}.{2}.ag.json".format(safe_channel_name,
                                                  country_locale, ag_hsh)
            artifacts.append({
                "key": ag_key,
                "data": ag,
            })
            tile_index[country_locale] = {
                'legacy': os.path.join(env.config.CLOUDFRONT_BASE_URL,
                                       legacy_key),
                'ag': os.path.join(env.config.CLOUDFRONT_BASE_URL, ag_key),
            }

    if deploy:
        # include tile index if deployment is requested.  'ver' allows us to make onyx
        # backward compatible more easily
        artifacts.append({
            "key":
            "{0}_{1}".format(safe_channel_name,
                             env.config.S3["tile_index_key"]),
            "data":
            json.dumps(tile_index, sort_keys=True),
            "force_upload":
            True,
        })

    # include data submission in artifacts
    data_serialized = json.dumps(compress_payload(data), sort_keys=True)
    hsh = hashlib.sha1(data_serialized).hexdigest()
    dt_str = datetime.utcnow().isoformat().replace(":", "-")
    artifacts.append({
        "key":
        os.path.join("/distributions", safe_channel_name,
                     "{0}.{1}.json".format(hsh, dt_str)),
        "data":
        data_serialized,
        "dist":
        True
    })

    return artifacts
Example #53
0
from splice.environment import Environment

env = Environment.instance("integration_tests.prod_settings.DefaultConfig")
# env = Environment.instance()

from splice.queries import tile_stats_weekly, slot_stats_weekly, tile_stats_monthly, slot_stats_monthly, \
    tile_summary_weekly, slot_summary_weekly, tile_summary_monthly, slot_summary_monthly, \
    tile_stats_daily, tile_summary_daily

with env.application.app_context():
    # TODO: check results
    conn = env.db.engine.connect()
    print "\ntile_summary_weekly"
    key, rval = tile_summary_weekly(conn, '2014-05-15')
    for x in rval:
        print x

    print "\ntile_summary_daily"
    _, rval = tile_summary_daily(conn, '2014-05-15')
    for year, week, tile_id, title, imps, clicks, pinned, blocked, spon, spon_link in rval:
        print year, week, tile_id, title, imps, clicks, pinned, blocked, spon, spon_link

    print "\ntile_stats_weekly - tile_id = 2"
    _, rval = tile_stats_weekly(conn, '2014-05-15', '2')
    for year, week, tile_id, title, country, locale, imps, clicks, pinned, blocked, spon, spon_link in rval:
        print year, week, tile_id, title, country, locale, imps, clicks, pinned, blocked, spon, spon_link

    print "\ntile_stats_weekly - tile_id = 2, country_code = US"
    _, rval = tile_stats_weekly(conn, '2014-05-15', '2', 'US')
    for year, week, tile_id, title, country, locale, imps, clicks, pinned, blocked, spon, spon_link in rval:
        print year, week, tile_id, title, country, locale, imps, clicks, pinned, blocked, spon, spon_link
Example #54
0
def create_webapp(*args, **kwargs):
    env = Environment.instance(*args, **kwargs)
    setup_routes(env.application)
    return env.application
Example #55
0
def populate_countries(table):
    countries = Environment.instance()._load_countries()
    op.bulk_insert(
        table,
        [{"country_code": code, "country_name": name} for code, name in countries]
    )
Example #56
0
def insert_tile(target_url, bg_color, title, typ, image_uri, enhanced_image_uri, locale,
                frecent_sites, time_limits, frequency_caps, adgroup_name, explanation,
                check_inadjacency, channel_id, conn=None, *args, **kwargs):

    from splice.environment import Environment
    env = Environment.instance()
    now = datetime.utcnow()

    trans = None
    if conn is None:
        conn = env.db.engine.connect()
        trans = conn.begin()

    try:
        conn.execute(
            text(
                "INSERT INTO adgroups ("
                "locale, "
                "start_date, "
                "end_date, "
                "start_date_dt, "
                "end_date_dt, "
                "name, "
                "explanation, "
                "frequency_cap_daily, "
                "frequency_cap_total, "
                "check_inadjacency, "
                "channel_id, "
                "created_at"
                ") "
                "VALUES ("
                ":locale, "
                ":start_date, "
                ":end_date, "
                ":start_date_dt, "
                ":end_date_dt, "
                ":adgroup_name, "
                ":explanation, "
                ":frequency_cap_daily, "
                ":frequency_cap_total, "
                ":check_inadjacency, "
                ":channel_id, "
                ":created_at"
                ")"
            ),
            locale=locale,
            start_date=time_limits.get('start'),
            end_date=time_limits.get('end'),
            start_date_dt=time_limits.get('start_dt'),
            end_date_dt=time_limits.get('end_dt'),
            adgroup_name=adgroup_name,
            explanation=explanation,
            frequency_cap_daily=frequency_caps['daily'],
            frequency_cap_total=frequency_caps['total'],
            check_inadjacency=check_inadjacency,
            channel_id=channel_id,
            created_at=now,
        )
        ag_id = conn.execute("SELECT MAX(id) FROM adgroups;").scalar()

        if frecent_sites:
            values = ','.join(["(%d, '%s', '%s')" % (ag_id, site, now) for site in frecent_sites])
            stmt = "INSERT INTO adgroup_sites (adgroup_id, site, created_at)  VALUES %s" % values
            conn.execute(stmt)

        conn.execute(
            text(
                "INSERT INTO tiles ("
                " target_url, bg_color, title, type, image_uri, enhanced_image_uri, created_at, locale, adgroup_id"
                ") "
                "VALUES ("
                " :target_url, :bg_color, :title, :type, :image_uri, :enhanced_image_uri, :created_at, :locale, :adgroup_id"
                ")"
            ),
            target_url=target_url,
            bg_color=bg_color,
            title=title,
            type=typ,
            image_uri=image_uri,
            enhanced_image_uri=enhanced_image_uri,
            created_at=now,
            locale=locale,
            adgroup_id=ag_id
        )
        tile_id = conn.execute("SELECT MAX(id) FROM tiles;").scalar()

        if trans is not None:
            trans.commit()
        return tile_id, ag_id
    except Exception as e:
        if trans is not None:
            trans.rollback()
        raise e
Example #57
0
def main():
    # get argument
    parser = OptionParser(
        usage='Usage: %prog [<CDN_URL>]'
        '\n\nArguments:'
        '\n  CDN_URL    Of the format "<scheme>://<fqdn>".'
        ' Trailing "/" not allowed.'
        '\n\nExamples:'
        '\n  %prog https://tiles.cdn.mozilla.net'
    )
    parser.set_defaults(
        quiet=False,
        verbose=False,
    )
    parser.add_option(
        '-q', '--quiet',
        action='store_true',
        dest='quiet',
        help="Don't report NOTICE",
    )
    parser.add_option(
        '-v', '--verbose',
        action='store_true',
        dest='verbose',
        help='Report SUCCESS',
    )
    options, args = parser.parse_args()

    try:
        from splice.environment import Environment
        config = Environment.instance().config
        cdn = 'https://%s.s3.amazonaws.com' % config.S3['bucket']
        tile_index_key = config.S3['tile_index_key']
    except Exception:
        cdn = 'https://tiles.cdn.mozilla.net'
        tile_index_key = 'tile_index_v3.json'

    channels = [
        'desktop',
        'android',
        'desktop-prerelease',
        'hello'
    ]

    if len(args) == 1:
        cdn = args.pop()
    elif len(args) > 1:
        parser.parse_args(['-h'])

    if not options.quiet:
        print(
            'NOTICE: crawling: %s/%s_%s' %
            (cdn, tuple(channels), tile_index_key)
        )
        print('NOTICE: calculating tiles urls')

    errors = []

    # extract tiles urls from tile index
    try:
        urls = [
            tiles_url
            for index in validate(
                grequests.imap(
                    (grequests.get('%s/%s_%s' % (cdn, channel, tile_index_key), allow_redirects=False,)
                     for channel in channels),
                    size=10
                ),
                options.verbose,
                errors,
            )
            for key, value in index.json().iteritems()
            if '/' in key
            for tiles_url in value.values()
        ]

        tiles_urls = set()
        for url in urls:
            if type(url) is list:
                tiles_urls.update(url)
            else:
                tiles_urls.add(url)

        if not options.quiet:
            print('NOTICE: tiles urls extracted: %s' % len(tiles_urls))
            print('NOTICE: calculating image urls')

        # extract image urls from tiles
        image_urls = set([
            image_url
            for tiles in validate(
                grequests.imap(
                    (grequests.get(tiles_url, allow_redirects=False)
                     for tiles_url in tiles_urls),
                    size=10
                ),
                options.verbose,
                errors,
            )
            for value_x in tiles.json().values()
            for value_y in value_x
            for key, image_url in value_y.iteritems()
            if key in ['imageURI', 'enhancedImageURI']
        ])

        if not options.quiet:
            print('NOTICE: image urls extracted: %s' % len(image_urls))
            print('NOTICE: validating image urls')

        # Two things to notice here:
        # 1. expanding the list comprehension is necessary to get the 'validate'
        #    step above to actually evaluate (it's lazy.)
        # 2. the actual value of the list comprehension is dropped, not returned.
        [
            valid.url
            for valid in validate(
                grequests.imap(
                    (grequests.head(image_url, allow_redirects=False)
                     for image_url in image_urls),
                    size=10
                ),
                options.verbose,
                errors,
            )
        ]
    except Exception as e:
        msg = 'ERROR: %s' % e
        print(msg)
        print(traceback.format_exc())
        errors.append(msg)

    if errors:
        exit(1)
Example #58
0
from datetime import datetime
from sqlalchemy import text
from splice.environment import Environment

db = Environment.instance().db
metadata = db.metadata


class Channel(db.Model):
    __tablename__ = "channels"

    id = db.Column(db.Integer(),
                   autoincrement=True,
                   primary_key=True,
                   info={"identity": [1, 1]})
    name = db.Column(db.String(32), nullable=False, unique=True)
    created_at = db.Column(db.DateTime(), default=datetime.utcnow)


class Distribution(db.Model):
    __tablename__ = "distributions"

    id = db.Column(db.Integer(),
                   autoincrement=True,
                   primary_key=True,
                   info={"identity": [1, 1]})
    url = db.Column(db.Text(), nullable=False)
    channel_id = db.Column(db.Integer(),
                           db.ForeignKey('channels.id'),
                           nullable=False)
    deployed = db.Column(db.Boolean(), default=False)
Example #59
0
def distribute(data, channel_id, deploy, scheduled_dt=None):
    """Upload tile data to S3
    :data: tile data
    :channel_id: channel id for which to distribute tile data
    :deploy: whether to deploy tiles to firefox immediately
    :scheduled_dt: an optional scheduled date in the future for deploy. overrides deploy
    """
    command_logger.info("Generating Data")

    from splice.models import Channel
    from splice.environment import Environment

    env = Environment.instance()

    if scheduled_dt:
        now = datetime.utcnow()
        if now > scheduled_dt:
            raise ScheduleError("scheduled date needs to be in the future")
        elif deploy:
            raise ScheduleError(
                "cannot specify deploy and schedule at the same time")

    channel = (env.db.session.query(Channel).filter(
        Channel.id == channel_id).one())

    artifacts = generate_artifacts(data, channel.name, deploy)

    command_logger.info("Uploading to S3 for channel {0}".format(channel.name))

    bucket = Environment.instance().s3.get_bucket(
        Environment.instance().config.S3["bucket"])
    cors = CORSConfiguration()
    cors.add_rule("GET", "*", allowed_header="*")
    bucket.set_cors(cors)

    distributed = []

    headers = {
        'Cache-Control': 'public, max-age=31536000',
        'Content-Disposition': 'inline',
    }

    # upload individual files
    for file in artifacts:
        if "mime" in file:
            headers['Content-Type'] = file["mime"]
        else:
            # default to JSON for artifacts
            headers['Content-Type'] = "application/json"

        key = bucket.get_key(file["key"])
        uploaded = False

        if key is None or file.get("force_upload"):
            key = Key(bucket)
            key.name = file["key"]
            key.set_contents_from_string(file["data"], headers=headers)
            key.set_acl("public-read")
            uploaded = True

        url = key.generate_url(expires_in=0, query_auth=False)

        # remove x-amz-security-token, which is inserted even if query_auth=False
        # ref: https://github.com/boto/boto/issues/1477
        uri = furl(url)
        try:
            uri.args.pop('x-amz-security-token')
        except:
            pass
        url = uri.url

        if uploaded:
            command_logger.info("UPLOADED {0}".format(url))
        else:
            command_logger.info("SKIPPED {0}".format(url))
        distributed.append([url, uploaded])

        if file.get("dist", False):
            insert_distribution(url, channel_id, deploy, scheduled_dt)

    return distributed
Example #60
0
def get_possible_distributions(today=None, channel_id=None):
    """Generate all possible distributions for a given date and channel.
    The result tiles are grouped by the (country, locale, channel_id),
    a tile index file will be generated as well as the last item of result.
    Note that all tiles in a distribution will be ordered by the created
    timestamp descendingly.

    Params:
        today: date, the target date on which to produce the distributions.
        The default is None, which means use the current date.
        channel_id: int, the target channel_id. Will produce distributions
        for all the channels if not specified.
    Returns:
        A distribution dictionary of (channel, distribution_list) type,
        where channel is the name of the channel, and distribution_list
        consists of all distributions for that channel. For example:
        {
            "desktop": [
                {
                    "key": "desktop/US/en-US/some_hash_0.json",
                    "data": {"distribution_payload"}
                },
                {
                    "key": "desktop/CA/en-US/some_hash_1.json",
                    "data": {"distribution_payload"}
                },

                ...,

                {
                    "key": "desktop/CA/en-GB/some_hash_2.json",
                    "data": {"distribution_payload"}
                },
                {
                    "key": "desktop_tile_index.json"
                    "data": {"tile_index_payload"},
                    "force_upload": True
                }
            ]
        }

    """
    # TODO([email protected]): Clean up suggested tiles
    from splice.environment import Environment

    env = Environment.instance()
    if today is None:
        today = datetime.utcnow().date()

    query = (env.db.session.query(Tile).filter(Tile.paused == false()).filter(
        Adgroup.paused == false()).filter(Campaign.paused == false()).filter(
            Campaign.end_date >= today).filter(Campaign.start_date <= today).
             join(Adgroup).join(Campaign).join(CampaignCountry).order_by(
                 desc(Tile.created_at)))

    if channel_id is not None:
        query = query.filter(Campaign.channel_id == channel_id)

    rows = query.all()
    bucketer = load_bucketer()
    artifacts = defaultdict(list)
    tiles = {}
    for tile in rows:
        locale = tile.adgroup.locale
        countries = tile.adgroup.campaign.countries
        channel = tile.adgroup.channel.name
        safe_channel_name = urllib.quote(channel)

        new_tiles = _create_tiles(tile, bucketer)
        legacy_tiles = _create_tiles(tile, bucketer, True)
        suggested = tile.adgroup.type == "suggested" and len(
            tile.adgroup.categories) > 0

        for country in countries:
            key = (safe_channel_name, country.country_code, locale)
            value = tiles.setdefault(
                key, Dists(legacy=[], directory=[], suggested=[]))
            if suggested:
                value.suggested.extend(new_tiles)
            else:
                value.directory.extend(new_tiles)
                value.legacy.extend(legacy_tiles)

    tile_index = {}
    for (channel, country, locale), (legacy, directory,
                                     _suggested) in tiles.items():
        country_locale = "%s/%s" % (country, locale)
        legacy_keys, ag_keys = [], []

        # v2
        for legacy_tiles in multiplex_directory_tiles(legacy):
            legacy_json = json.dumps({'locale': legacy_tiles}, sort_keys=True)
            legacy_hsh = hashlib.sha1(legacy_json).hexdigest()
            legacy_key = "{0}/{1}.{2}.json".format(channel, country_locale,
                                                   legacy_hsh)
            legacy_keys.append(legacy_key)
            artifacts[channel].append({"key": legacy_key, "data": legacy_json})

        # v3
        for ag_tiles in multiplex_directory_tiles(directory):
            ag = json.dumps({
                'suggested': [],
                'directory': ag_tiles
            },
                            sort_keys=True)
            ag_hsh = hashlib.sha1(ag).hexdigest()
            ag_key = "{0}/{1}.{2}.ag.json".format(channel, country_locale,
                                                  ag_hsh)
            ag_keys.append(ag_key)
            artifacts[channel].append({
                "key": ag_key,
                "data": ag,
            })

        tile_index_channel = tile_index.setdefault(channel, {'__ver__': 3})
        all_legacy_keys = [
            os.path.join(env.config.CLOUDFRONT_BASE_URL, k)
            for k in legacy_keys
        ]
        all_ag_keys = [
            os.path.join(env.config.CLOUDFRONT_BASE_URL, k) for k in ag_keys
        ]
        tile_index_channel[country_locale] = {
            'legacy': all_legacy_keys,
            'ag': all_ag_keys
        }

    # the index files
    for channel, tile_index_channel in tile_index.items():
        artifacts[channel].append({
            "key":
            "{0}_{1}".format(channel, env.config.S3["tile_index_key"]),
            "data":
            json.dumps(tile_index_channel, sort_keys=True),
            "force_upload":
            True
        })
    return artifacts