Example #1
0
def main():
    global csv_data

    if len(sys.argv) < 7:
        print("Analytics.py <identity> <account> <webproperty> <profile> <start date> <end date>")

    profile = ga.authenticate(
        identity=sys.argv[1], save=True, interactive=True,
        account=sys.argv[2], webproperty=sys.argv[3], profile=sys.argv[4]
    )

    bad_referrers = Referrers.make_bad_referrer_filter(profile)

    total_overall_query_rows = make_query(profile, filters=bad_referrers).rows
    add_to_csv("Total Overall,Overall", total_overall_query_rows)
    device_query_rows = make_query(profile, dimensions=['deviceCategory'], filters=bad_referrers).rows
    add_to_csv("Total Overall", device_query_rows)

    grouping_overall_query_rows = make_query(profile, dimensions=['channelGrouping'], filters=bad_referrers).rows
    add_to_csv("Grouping Overall", grouping_overall_query_rows)
    add_device_data(profile, "Grouping Overall", grouping_overall_query_rows, "ga:channelGrouping==%s;" + bad_referrers)

    add_channel_grouping_data(profile, "Social", filter_social)
    add_channel_grouping_data(profile, "Organic", filter_organic)
    add_channel_grouping_data(profile, "Direct", filter_direct)
    add_channel_grouping_data(profile, "Referral", "%s;%s" % (bad_referrers, filter_referral))

    output_csv()
Example #2
0
def from_blueprint(scope, src):
    description = yaml.load(src)
    blueprint = ga.Blueprint(description)
    credentials = {}
    credentials.update(blueprint.identity or {})
    credentials.update(blueprint.scope)
    profile = ga.authenticate(interactive=True, save=True, **credentials)
    return blueprint.queries(profile)
Example #3
0
def cli(ctx, identity, account, webproperty, profile, version):
    ctx.obj = ga.authenticate(identity=identity,
                              account=account,
                              webproperty=webproperty,
                              profile=profile,
                              interactive=True,
                              save=True)

    if version:
        click.echo('googleanalytics {}'.format(ga.__version__))
 def __init__(self,startDate,endDate,heading,query = ""):
     credentials = json.load(open('secret/credentials.json'))
     accounts = ga.authenticate(**credentials)
     self.profile = accounts[0].webproperties[0].profile
     self.document = Document('secret/default.docx')
     self.document.add_heading(heading, 0)
     self.startDate = startDate
     self.endDate = endDate
     self.sessions = 0
     self.query_path = query
Example #5
0
 def setUp(self):
     accounts = ga.authenticate()
     if not len(accounts):
         raise Exception("Cannot proceed with unit testing: \
             the authorized Google account does not use Google Analytics.")
     else:
         self.account = accounts[0]
         self.webproperty = self.account.webproperties[0]
         self.profile = self.webproperty.profiles[0]
         self.query = self.profile.core.query
Example #6
0
 def setUp(self):
     accounts = ga.authenticate()
     if not len(accounts):
         raise Exception("Cannot proceed with unit testing: \
             the authorized Google account does not use Google Analytics.")
     else:
         self.account = accounts[0]
         self.webproperty = self.account.webproperties[0]
         self.profile = self.webproperty.profiles[0]
         self.query = self.profile.core.query
Example #7
0
 def __init__(self, startDate, endDate, heading, query=""):
     credentials = json.load(open('secret/credentials.json'))
     accounts = ga.authenticate(**credentials)
     self.profile = accounts[0].webproperties[0].profile
     self.document = Document('secret/default.docx')
     self.document.add_heading(heading, 0)
     self.startDate = startDate
     self.endDate = endDate
     self.sessions = 0
     self.query_path = query
Example #8
0
def main():
    if len(sys.argv) < 7:
        print("Referrers.py <identity> <account> <webproperty> <profile> <start date> <end date>")

    profile = ga.authenticate(
        identity=sys.argv[1], save=True, interactive=True,
        account=sys.argv[2], webproperty=sys.argv[3], profile=sys.argv[4]
    )

    referrers = get_cleared_referrers(profile).serialize(format='ascii', with_metadata=True)
    print(referrers)
Example #9
0
def cli(ctx, identity, account, webproperty, profile, version):
    ctx.obj = ga.authenticate(
        identity=identity,
        account=account,
        webproperty=webproperty,
        profile=profile,
        interactive=True,
        save=True)

    if version:
        click.echo('googleanalytics {}'.format(ga.__version__))
Example #10
0
 def _build_profile(klass, profile_config, global_config):
     oa2_credentials = profile_config.get(
         "credentials", global_config.get("default_oa2_credentials"))
     credentials = OA2CredentialsFactory.get_credentials(
         global_config, oa2_credentials)
     profile = googleanalytics.authenticate(
         identity=profile_config.get("identity"),
         client_id=credentials.client_id,
         client_secret=credentials.client_secret,
         access_token=credentials.access_token,
         account=profile_config.get("account"),
         webproperty=profile_config.get("webproperty"),
     )
     return profile
Example #11
0
 def setUp(self):
     accounts = ga.authenticate(
         client_id=os.environ['GOOGLE_ANALYTICS_CLIENT_ID'], 
         client_secret=os.environ['GOOGLE_ANALYTICS_CLIENT_SECRET'], 
         refresh_token=os.environ['GOOGLE_ANALYTICS_REFRESH_TOKEN'], 
         )
     if not len(accounts):
         raise Exception("Cannot proceed with unit testing: \
             the authorized Google account does not use Google Analytics.")
     else:
         self.account = accounts[0]
         self.webproperty = self.account.webproperties[0]
         self.profile = self.webproperty.profiles[0]
         self.query = self.profile.core.query
Example #12
0
def ganalytics_connect(client_email, private_key):
    """
    Connect to a Google Analytics account. Doesn't seem to be \
    memoizable. Google throws `googleapiclient.errors.HttpError: 401`.
    """

    # Redis/Python/I is/am stupid and backslashes are not interpreted as
    # escape characters (which supposedly is bad) so we must fix it.
    accounts = ga.authenticate(client_email=client_email,
                               private_key=private_key.replace("\\n", "\n"))

    # Queries are handled per profile, and we only need one (for now)
    profile = accounts[0].webproperties[0].profile

    return profile
def ga_properties(tokens):
    """
    Get a list of properties associated with a google analytics account.
    """
    accounts = googleanalytics.authenticate(**tokens)
    properties = []
    for account in accounts:
        for prop in account.webproperties:
            website_url = prop.url
            if website_url:
                property = {'property': website_url, 'profiles': []}
                for profile in prop.profiles:
                    property['profiles'].append(profile.name)
                properties.append(property)
    return properties
def ga_properties(tokens):
    """
    Get a list of properties associated with a google analytics account.
    """
    accounts = googleanalytics.authenticate(**tokens)
    properties = []
    for account in accounts:
        for prop in account.webproperties:
            website_url = prop.url
            if website_url:
                property = {'property': website_url, 'profiles': []}
                for profile in prop.profiles:
                    property['profiles'].append(profile.name)
                properties.append(property)
    return properties
Example #15
0
def query(identity=None, account=None, webproperty=None, profile=None, blueprint=None):
    # profile = ga.auth.navigate(accounts, account, webproperty, profile)
    
    if blueprint:
        description = yaml.load(blueprint)
        blueprint = ga.Blueprint(description)
        credentials = {}
        credentials.update(blueprint.identity or {})
        credentials.update(blueprint.scope)
        profile = ga.authenticate(interactive=True, save=True, **credentials)
        queries = blueprint.queries(profile)

        reports = []
        for query in queries:
            report = query.get()
            reports.append({
                'title': query.title, 
                'query': query.raw, 
                'results': report.serialize(), 
            })

        click.echo(json.dumps(reports, indent=2))
    else:
        raise NotImplementedError()
Example #16
0
def trend(counts) :
    X, Y = zip(*counts)

    X = numpy.array([x.toordinal() for x in X])
    X -= datetime.date.today().toordinal() 
    A = numpy.array([numpy.ones(len(X)), X])

    Y = numpy.log(numpy.array(Y))

    w = numpy.linalg.lstsq(A.T,Y)[0]

    return w

profile = ga.authenticate(identity='sunspot', 
                          account='Illinois Campaign for Political Reform',
                          webproperty='Illinois Sunshine', 
                          profile='Illinois Sunshine')

#profile = ga.authenticate(identity='sunspot', 
#                          account='Councilmatic',
#                          webproperty='Chicago Councilmatic', 
#                          profile='Chicago Councilmatic')


totals = profile.core.query.metrics('pageviews').\
                            daily(days=-WINDOW)

totals = {date : count for date, count in totals.rows}

pages = profile.core.query.metrics('pageviews').\
                           dimensions('pagepath').\
Example #17
0
 def __init__(self, yaml_file: str):
     self.settings = Settings(yaml_file)
     self.accounts = ga.authenticate(
         client_email=self.settings.credentials.client_email,
         private_key=self.settings.credentials.private_key,
     )
Example #18
0
 def authenticated_fn(identity=None, *vargs, **kwargs):
     accounts = ga.authenticate(identity=identity, interactive=True, save=True)
     return fn(identity, accounts, *vargs, **kwargs)
Example #19
0
def readData(days=3):
    # get connected to Google Analytics
    if os.path.exists('./service_account.json'):
        credentials = json.load(open('service_account.json'))
    else:
        print("file not exist")
        # (this will be interactive, as you'll need to confirm in a browser window)
        credentials = ga.authorize()
        # turn the credentials object into a plain dictionary
        credentials = credentials.serialize()
        json.dump(credentials, open('service_account.json', 'w'))

    accounts = ga.authenticate(**credentials)

    # list of accounts, properties and views
    print("accounts size: " + str(len(accounts)))
    for idx, account in enumerate(accounts):
        print(idx, account.name)

    web_properties = accounts[0].webproperties
    print("properties size:" + str(len(web_properties)))
    for idx, web_property in enumerate(web_properties):
        print(idx, web_property.name)

    profiles = accounts[0].webproperties[0].profiles
    print("profiles size: " + str(len(profiles)))
    for idx, profile in enumerate(profiles):
        print(idx, profile.name)

    # choose profile
    profile = profiles['ACT 1 All Traffic']

    # define the time range for daily data of the last 3 days
    today = datetime.datetime.date(datetime.datetime.now())
    dates = []
    for count in range(1, (days + 1)):
        date = today - datetime.timedelta(count)
        dates.append(date)

    # Pull data for organic traffic
    # 1. unique visitors
    organic_unique_visitors = []
    for date in dates:
        unique_visitors = profile.core.query.set(metrics=['ga:users'])\
                          .segment('organic traffic').range(date).value
        organic_unique_visitors.append(unique_visitors)

    res_organic_unique_visitors = "\n".join(
        "{} {}".format(x, y) for x, y in zip(dates, organic_unique_visitors))
    print("Organic unique visitors: ", res_organic_unique_visitors)

    # 2. average time on site
    organic_avg_time_on_site = []
    for date in dates:
        avg_time_on_site = profile.core.query.metrics('avgSessionDuration').segment('organic traffic')\
                           .range(date).value
        organic_avg_time_on_site.append(avg_time_on_site)

    res_organic_avg_time = "\n".join(
        "{} {}".format(x, y) for x, y in zip(dates, organic_avg_time_on_site))
    print("Organic avg time on site: ", res_organic_avg_time)

    # 3. average pages viewed by each organic unique visitors
    organic_pages_viewed = []
    for date in dates:
        pages_viewed = profile.core.query.metrics('pageviews').segment('organic traffic')\
                      .range(date).value
        organic_pages_viewed.append(pages_viewed)

    organic_avg_pages_viewed = [
        x / y for x, y in zip(organic_pages_viewed, organic_unique_visitors)
    ]
    res_organic_avg_pages_viewed = "\n".join(
        "{} {}".format(x, y) for x, y in zip(dates, organic_avg_pages_viewed))
    print("Organic avg pages viewed: ", res_organic_avg_pages_viewed)

    # 4. return visitors
    organic_return_visitors = []
    for date in dates:
        return_visitors = profile.core.query.set(metrics=['ga:users']).set(dimensions=['ga:channelGrouping'])\
                          .segment('returning users').range(date).get()
        return_dict = {}
        for row in return_visitors.rows:
            key = row[0]
            value = row[1]
            return_dict[key] = value
        organic_return_visitors.append(return_dict['Organic Search'])

    res_organic_return_visitors = "\n".join(
        "{} {}".format(x, y) for x, y in zip(dates, organic_return_visitors))
    print("Organic return visitors: ", res_organic_return_visitors)

    # Pull data for paid traffic (for the rest two metrics: unique vistiors, avg time on site)
    # 1. unique visitors
    paid_unique_visitors = []
    for date in dates:
        unique_visitors = profile.core.query.set(metrics=['ga:users'])\
                          .segment('paid traffic').range(date).value
        paid_unique_visitors.append(unique_visitors)

    res_paid_unique_visitors = "\n".join(
        "{} {}".format(x, y) for x, y in zip(dates, paid_unique_visitors))
    print("Paid unique visitors: ", res_paid_unique_visitors)

    # 2. average time on site
    paid_avg_time_on_site = []
    for date in dates:
        avg_time_on_site = profile.core.query.metrics('avgSessionDuration').segment('paid traffic')\
                           .range(date).value
        paid_avg_time_on_site.append(avg_time_on_site)

    res_paid_avg_time = "\n".join(
        "{} {}".format(x, y) for x, y in zip(dates, paid_avg_time_on_site))
    print("Paid avg time on site: ", res_paid_avg_time)

    # 3. return visitors
    paid_return_visitors = []
    for date in dates:
        unique_visitors = profile.core.query.set(metrics=['ga:users'])\
                          .segment('paid traffic').range(date).value
        new_visitors = profile.core.query.set(metrics=['ga:newUsers'])\
                       .segment('paid traffic').range(date).value
        return_visitors = unique_visitors - new_visitors
        paid_return_visitors.append(return_visitors)

    res_paid_return_visitors = "\n".join(
        "{} {}".format(x, y) for x, y in zip(dates, paid_return_visitors))
    print("Paid return visitors: ", res_paid_return_visitors)

    # get output for writing into DWH
    final_res = []
    for i in range(len(dates)):
        tuple_organic = (dates[i], "organic", organic_unique_visitors[i], organic_avg_time_on_site[i], organic_avg_pages_viewed[i], \
                         organic_return_visitors[i])
        tuple_paid = (dates[i], "paid", paid_unique_visitors[i], paid_avg_time_on_site[i], None, \
                         paid_return_visitors[i])

        final_res.append(tuple_organic)
        final_res.append(tuple_paid)

    print(final_res)

    return final_res
Example #20
0
import json
import googleanalytics as ga
import urllib2
import re
from math import floor

from settings import GA_PROFILE

with open('cred.json') as f:
	cred = json.load(f)

accounts = ga.authenticate(**cred['analytics'])
profile = accounts[0].webproperties[GA_PROFILE].profile

def get_report(word,top_number,from_to_dates, path):

	query = profile.core.query.total(from_to_dates[0],from_to_dates[1])

	if top_number > 20:
		top_number = 20
	elif top_number <= 0:
		top_number = 10

	if word == 'top':
		result = query.dimensions('ga:pagePath').metrics('pageviews', 'unique pageviews', 'ga:avgTimeOnPage', 'ga:bounceRate', 'entrances', 'exits').sort('pageviews', descending=True).limit(top_number)
	else:
		result = query.dimensions('ga:pagePath').metrics('pageviews', 'unique pageviews', 'ga:avgTimeOnPage', 'ga:bounceRate', 'entrances', 'exits').sort('pageviews', descending=True).limit(top_number).filter(pagepath=path)
		top_5_sources = query.dimensions('ga:source').metrics('ga:hits').sort('ga:hits', descending=True).limit(5).filter(pagepath=path)

	with open('titles.json') as t:
		titles = json.load(t)