コード例 #1
0
def get_cdb_searches(today, lapse='month'):
    query = "select * from query_log_master"

    query += " where client='portal-prod'"
    query += " and type != 'download' and results_by_resource != '{}' and results_by_resource != ''"

    query = add_time_limit(query=query, today=today, lapse=lapse)
    searches = cartodb_query(query)

    pubs = {}

    for search in searches:
        res_count = json.loads(search['results_by_resource'])
        for url in res_count:
            inst, col = get_inst_col(url)
            pub = "{0}-{1}".format(inst, col)
            if pub not in pubs:
                pubs[pub] = {
                    'searches': 1,
                    'records_searched': res_count[url],
                    'list_records_searched': [res_count[url]],
                    'url': url,
                    'inst': inst,
                    'col': col
                }
            else:
                pubs[pub]['searches'] += 1
                pubs[pub]['records_searched'] += res_count[url]
                pubs[pub]['list_records_searched'].append(res_count[url])
            pubs[pub] = get_cdb_stats(search, pubs[pub], from_download=False)

    return pubs
コード例 #2
0
def get_cdb_downloads(lapse, today):
    """Download the info in the downloads from CDB"""

    query = "select * from query_log_master where download is not null and download !=''"
    query += " and client='portal-prod'"  # Just production portal downloads

    query = add_time_limit(query=query, today=today, lapse=lapse)  # Just from the specific month
    d = cartodb_query(query)
    return d
コード例 #3
0
def get_inst_col(url):
    query = "select icode from resource_staging where url='{0}'".format(url)
    max_retries = 3
    retry = 0
    while retry < max_retries:
        d = cartodb_query(query)
        if len(d) > 0:
            inst = d[0]['icode']
            col = url.split('?r=')[1]
            return inst, col
        else:
            retry += 1
    return None, None
コード例 #4
0
ファイル: RepoChecker.py プロジェクト: VertNet/usagestats
    def get_all_repos(self):
        """Extract a list of all orgnames and reponames from CartoDB."""
        query = "select github_orgname, github_reponame\
                 from resource_staging\
                 where ipt is true and networks like '%VertNet%';"

        all_repos = cartodb_query(query)
        logging.info("Got {0} repos currently in CartoDB"
                     .format(len(all_repos)))

        result = []
        for repo in all_repos:
            result.append((repo['github_orgname'], repo['github_reponame']))

        return result
コード例 #5
0
ファイル: DatasetsSetup.py プロジェクト: VertNet/usagestats
    def post(self):
        urlfetch.set_default_fetch_deadline(60)
        self.response.headers['Content-Type'] = 'application/json'

        q = "select gbifdatasetid, icode, orgname, github_orgname, " \
            "source_url, github_reponame, url, gbifpublisherid " \
            "from resource_staging " \
            "where ipt=true and networks like '%VertNet%'"
        resources = cartodb_query(q)

        ds = []
        for resource in resources:
            ds.append(Dataset(id=resource['gbifdatasetid'], **resource))

        keys = ndb.put_multi(ds)

        result = {
            "datasets processed": len(keys),
        }

        self.response.write(json.dumps(result))
        return
コード例 #6
0
ファイル: GetEvents.py プロジェクト: VertNet/usagestats
    def get_events(self):
        """Build query and extract records."""

        # Extract CartoDB data, base query
        logging.info("Building %s query" % self.t)
        if self.t == 'download':
            # Line #6 of SQL is to avoid too large queries
            query = "SELECT cartodb_id, lat, lon, created_at, " \
                    "query AS query_terms, response_records, " \
                    "results_by_resource " \
                    "FROM %s " \
                    "WHERE type='download' "\
                    "AND octet_length(query)<=1500 " \
                    "AND download IS NOT NULL " \
                    "AND download !=''" % self.table_name
        else:
            # Line #6 of SQL is to avoid too large queries
            query = "SELECT cartodb_id, lat, lon, created_at, " \
                    "query AS query_terms, response_records, " \
                    "results_by_resource " \
                    "FROM %s " \
                    "WHERE left(type, 5)='query' " \
                    "AND octet_length(query)<=1500 " \
                    "AND results_by_resource IS NOT NULL " \
                    "AND results_by_resource != '{}' " \
                    "AND results_by_resource !=''" % self.table_name

        # Just production portal downloads
        query += " and client='portal-prod'"

        # Only restrict time if using default table
        if self.table_name == CDB_TABLE:
            queried_date = datetime(
                int(self.period[:4]),
                int(self.period[-2:]),
                1
            )
            queried_date += timedelta(days=32)
            query = add_time_limit(query=query, today=queried_date)

        logging.info("Executing query")
        logging.info(query)
        try:
            data = cartodb_query(query)
        except ApiQueryMaxRetriesExceededError:
            self.error(504)
            resp = {
                "status": "error",
                "message": "Could not retrieve data from CartoDB",
                "data": {
                    "period": self.period,
                    "event_type": self.t
                }
            }
            self.response.write(json.dumps(resp) + "\n")
            return 1

        # Store 'data' in class property
        self.data = data

        # Finish method
        logging.info("Extracted %d %s events" % (len(data), self.t))
        return 0