Example #1
0
    def check_failed_repos(self):
        """Check repository watchers on GitHub."""

        # Get all of the repositories from Carto
        all_repos = self.get_all_repos()
        repos = {}
        headers = {
            'User-Agent': 'VertNet',
            'Accept': 'application/vnd.github.v3+json',
            'Authorization': 'token {0}'.format(apikey('ghb'))
        }

        for repo in all_repos:
            orgname = repo[0]
            reponame = repo[1]

            if orgname is None or reponame is None:
                self.failed_repos.append(repo)
                continue

            # API URL https://api.github.com/repos/[orgname]/[reponame]/subscribers
            rpc = urlfetch.create_rpc()
            url = '/'.join([GH_URL, 'repos', orgname, reponame, 'subscribers'])
            urlfetch.set_default_fetch_deadline(60)
            urlfetch.make_fetch_call(rpc, url, headers=headers)

            repos[repo] = rpc

            # Wait 0.1 second to avoid GitHub abuse triggers
            time.sleep(0.1)

        # temporarily hard code the watcher to look for
        for repo in repos:
            rpc = repos[repo]
            result = rpc.get_result()
            content = json.loads(result.content)
            s = 'Version: %s' % __version__
            s += '\nGot {0} watchers for {1}'.format(len(content), repo[0])
            logging.info(s)
            watcher_list = [x['login'] for x in content]
            if watcher_list is None or self.watcher not in watcher_list:
                orgname = repo[0]
                reponame = repo[1]
                if orgname is None and reponame is None:
                    self.failed_repos.append(repo)
                else:
                    s = 'http://github.com/'
                    if orgname is not None:
                        s += '%s' % orgname
                    s += '/'
                    if reponame is not None:
                        s += '%s' % reponame
                    s += '/'
                    self.failed_repos.append(s)
        return
Example #2
0
def store_models(models, key, testing=False):

    try:
        model_urls = json.loads(open(model_url_path, 'r').read().rstrip())
    except IOError:
        model_urls = {}

    if testing is True:
        org = 'jotegui'
        repo = 'statReports'
    else:  # TODO: Update this block
        from util import apikey  # Remove when repo changed to VertNet
        key = apikey(True)  # Remove when repo changed to VertNet
        org = 'jotegui'  # Change to VertNet org
        repo = 'statReports'  # Change to VertNet repo

    for model in models:

        #created_at = models[model]['created_at'].replace('/', '_')
        message = 'Putting JSON data on {0} for {1}, {2}'.format(models[model]['report_month'],
                                                                 models[model]['github_org'],
                                                                 models[model]['github_repo'])
        commiter = {'name': 'VertNet', 'email': '*****@*****.**'}
        content = base64.b64encode(json.dumps(models[model]))
        path = 'data/{0}_{1}.json'.format(model.replace(' ', '_'), models[model]['report_month'].replace('/', '_'))

        headers = {'User-Agent': 'VertNet', 'Authorization': 'token {0}'.format(key)}
        request_url = 'https://api.github.com/repos/{0}/{1}/contents/{2}'.format(org, repo, path)
        json_input = json.dumps({"message": message, "commiter": commiter, "content": content})

        r = requests.put(request_url, data=json_input, headers=headers)
        status_code = r.status_code

        if status_code == 201:
            logging.info('SUCCESS - Data model stored for resource {0}'.format(repo))
        else:
            logging.error('DATA MODEL CREATION FAILED for resource {0}'.format(repo))
        time.sleep(2)  # Wait 2 sput_store_reportseconds between insert and insert to avoid 409

        if model not in model_urls:
            model_urls[model] = [request_url]
        else:
            model_urls[model].append(request_url)

    # Store urls on the generated models
    f = open(model_url_path, 'w')
    f.write(json.dumps(model_urls))
    f.close()
    logging.info('MODEL URLs stored in local file modelURLs.json')

    return
def load_previous_model(model):
    # If it's the first time, take 2013 and 2014/01-03 values from files
    key = apikey(testing=False)

    if model['last_report_url'] == "":
        model = add_initial_year(model, month="03")
        model = add_initial_year(model, month="02")
        model = add_initial_year(model, month="01")
        model = add_initial_history(model)

    # Else, take values from last month's json
    else:
        retries = 5
        cont = 0
        success = False
        url = model['last_report_url']
        headers = {'User-Agent': 'VertNet', 'Authorization': 'token {0}'.format(key)}

        while cont < retries:
            r = requests.get(url, headers=headers)

            if r.status_code == 200:

                prev_model = json.loads(base64.b64decode(json.loads(r.content)['content']))
                model['year'] = prev_model['year']
                model['history'] = prev_model['history']
                success = True
                break
            else:
                cont += 1
                logging.warning("Attempt failed with status {0}")
                logging.warning(r.content['message'])
                logging.warning("Will retry in 5 seconds".format(r.status_code))
                time.sleep(5)

        if success is False:
            logging.error("Something went wrong retrieving past data for {0} in {1}".format(model["url"], url))
            model['year'] = {"downloads": 0, "records": 0, "downloads_period": 0, "records_period": 0}
            model['history'] = {"downloads": 0, "records": 0, "downloads_period": 0, "records_period": 0}

    return model
Example #4
0
    def check_failed_repos(self):
        """Check repository name consistency between Carto and GitHub."""

        all_repos = self.get_all_repos()
        repos = {}
        headers = {
            'User-Agent': 'VertNet',
            'Accept': 'application/vnd.github.v3+json',
            'Authorization': 'token {0}'.format(apikey('ghb'))
        }

        for repo in all_repos:
            orgname = repo[0]
            reponame = repo[1]

            if orgname is None or reponame is None:
                self.failed_repos.append(repo)
                continue

            rpc = urlfetch.create_rpc()
            url = '/'.join([GH_URL, 'orgs', orgname, 'repos?per_page=100'])
            urlfetch.set_default_fetch_deadline(60)
            urlfetch.make_fetch_call(rpc, url, headers=headers)

            repos[repo] = rpc

        for repo in repos:
            rpc = repos[repo]
            result = rpc.get_result()
            content = json.loads(result.content)
            s = 'Version: %s' % __version__
            s += '\nGot {0} repos for {1}'.format(len(content), repo[0])
            logging.info(s)
            repo_list = [x['name'] for x in content]
            if repo_list is None or repo[1] not in repo_list:
                self.failed_repos.append(repo)

        return
Example #5
0
    def check_failed_repos(self):
        """Check repository name consistency between CartoDB and GitHub."""

        all_repos = self.get_all_repos()
        repos = {}
        headers = {
            'User-Agent': 'VertNet',
            'Accept': 'application/vnd.github.v3+json',
            'Authorization': 'token {0}'.format(apikey('ghb'))
        }

        for repo in all_repos:
            orgname = repo[0]
            reponame = repo[1]

            if orgname is None or reponame is None:
                self.failed_repos.append(repo)
                continue

            rpc = urlfetch.create_rpc()
            url = '/'.join([GH_URL, 'orgs', orgname, 'repos'])
            urlfetch.set_default_fetch_deadline(60)
            urlfetch.make_fetch_call(rpc, url, headers=headers)

            repos[repo] = rpc

        for repo in repos:
            rpc = repos[repo]
            result = rpc.get_result()
            content = json.loads(result.content)
            logging.info("Got {0} repos for {1}".format(len(content), repo[0]))
            repo_list = [x['name'] for x in content]
            if repo_list is None or repo[1] not in repo_list:
                self.failed_repos.append(repo)

        return
Example #6
0
    def send_issue(self, report_entity):
        """."""

        report_key = report_entity.key
        logging.info("Ready to send issue to %s" % report_key.id())

        gbifdatasetid = report_entity.reported_resource.id()
        logging.info("Sending issue for dataset {0}".format(gbifdatasetid))

        # Build variables
        dataset_key = report_entity.reported_resource
        period_key = report_entity.reported_period
        dataset_entity, period_entity = ndb.get_multi([dataset_key,
                                                       period_key])

        # Check that dataset exists
        if not dataset_entity:
            self.error(500)
            resp = {
                "status": "error",
                "message": "Missing dataset in datastore."
                           " Please run /setup_datasets to fix",
                "data": {
                    "missing_dataset_key": dataset_key
                }
            }
            logging.error(resp)
            self.response.write(json.dumps(resp)+"\n")
            return

        # GitHub stuff
        org = dataset_entity.github_orgname
        repo = dataset_entity.github_reponame
        logging.info(org)
        logging.info(repo)
        key = apikey('ghb')
        user_agent = 'VertNet'

        # Testing block
        if self.testing:
            logging.info("Using testing repositories in jotegui")
            org = 'jotegui'
            repo = 'statReports'
            user_agent = 'jotegui'
            key = apikey('jot')

        # GitHub request headers
        headers = {
            'User-Agent': user_agent,
            'Authorization': 'token {0}'.format(key),
            "Accept": "application/vnd.github.v3+json"
        }

        # Issue creation, only if issue not previously created
        if report_entity.issue_sent is False:

            link = "http://" + MODULE + "/reports/" + gbifdatasetid + \
                    "/" + self.period + "/"
            link_all = "http://" + MODULE + "/reports/" + gbifdatasetid + "/"

            title = 'Monthly VertNet data use report for %s-%s, resource %s' \
                    % (period_entity.year,
                       period_entity.month,
                       dataset_entity.ccode)
            body = """Your monthly VertNet data use report is ready!
You can see the HTML rendered version of the reports with this link:

{0}

Raw text and JSON-formatted versions of the report are also available for
download from this link. In addition, a copy of the text version has been
uploaded to your GitHub repository, under the "Reports" folder. Also, a full
list of all reports can be accessed here:

{1}

You can find more information on the reporting system, along with an
explanation of each metric, here:

http://www.vertnet.org/resources/usagereportingguide.html

Please post any comments or questions to:
http://www.vertnet.org/feedback/contact.html

Thank you for being a part of VertNet.
""".format(link, link_all)
            labels = ['report']

            request_url = '{0}/{1}/{2}/issues'.format(GH_REPOS, org, repo)
            json_input = json.dumps({
                'title': title,
                'body': body,
                'labels': labels
            })

            # Make GitHub call
            r = urlfetch.fetch(
                url=request_url,
                method=urlfetch.POST,
                headers=headers,
                payload=json_input
            )

            # Check output
            logging.info(r.status_code)

            # HTTP 201 = Success
            if r.status_code == 201:
                logging.info("Issue %s successfully sent" % report_key.id())
                report_entity.issue_sent = True
            # Other generic problems
            else:
                logging.error("Issue %s couldn't be sent" % report_key.id())
                logging.error(r.content)
                resp = {
                    "status": "failed",
                    "message": "Got uncaught error code when uploading"
                               " report to GitHub. Aborting issue creation.",
                    "source": "send_to_github",
                    "data": {
                        "report_key": report_key,
                        "period": self.period,
                        "testing": self.testing,
                        "error_code": r.status_code,
                        "error_content": r.content
                    }
                }
                logging.error(resp)
                return

        # This 'else' should NEVER happen
        else:
            logging.warning("Issue for %s was already sent. This call"
                            " shouldn't have happened" % report_key.id())

        # Store updated version of Report entity
        report_entity.put()

        # Wait 2 seconds to avoid GitHub abuse triggers
        time.sleep(2)

        return
Example #7
0
    def send_issue(self, report_entity):
        """."""

        report_key = report_entity.key
        gbifdatasetid = report_entity.reported_resource.id()
        s =  "Version: %s\n" % __version__
        s += "Storing issue for dataset %s" % gbifdatasetid
        logging.info(s)

        # Build variables
        dataset_key = report_entity.reported_resource
        period_key = report_entity.reported_period
        dataset_entity, period_entity = ndb.get_multi([dataset_key, period_key])

        # Check that dataset exists
        if not dataset_entity:
            self.error(500)
            resp = {
                "status": "error",
                "message": "Missing dataset in datastore. Please run /setup_datasets "
                           "or remove associated Period entity from data store to fix.",
                "data": {
                    "missing_dataset_key": gbifdatasetid
                }
            }
            s =  "Version: %s\n" % __version__
            s += "Response: %s" % resp
            logging.error(s)
            self.response.write(json.dumps(resp)+"\n")

            # Set 'issue_sent' to True to avoid endless loop in the case a dataset does
            # not exist in the datastore.
            # TODO: Better if the Report entity had a flag for 'issue_skipped'
            # with default None. But, for now...
            report_entity.issue_sent = True

            # Store updated version of Report entity
            report_entity.put()

            return

        # GitHub stuff
        org = dataset_entity.github_orgname
        repo = dataset_entity.github_reponame
        user_agent = 'VertNet'
        key = apikey('ghb')

        # Testing block
        if self.testing:
            org = 'VertNet'
            repo = 'statReports'
            user_agent = 'VertNet'
            key = apikey('ghb')

        s =  "Version: %s\n" % __version__
        s += "Using GitHub repository %s/%s " % (org, repo)
        s += "as user_agent %s" % user_agent
        logging.info(s)

        # GitHub request headers
        headers = {
            'User-Agent': user_agent,
            'Authorization': 'token {0}'.format(key),
            "Accept": "application/vnd.github.v3+json"
        }

        # Issue creation, only if issue not previously created
        if report_entity.issue_sent == False:
            link_all = "http://%s/reports/%s/" % (MODULE, gbifdatasetid)
            link = "http://%s/reports/%s/%s/" % (MODULE, gbifdatasetid, self.period)
            link_gh = "https://github.com/%s/%s/tree/master/reports" % (org, repo)
            title = 'Monthly VertNet data use report for %s-%s, resource %s' \
                    % (period_entity.year,
                       period_entity.month,
                       dataset_entity.ccode)
            body = """Your monthly VertNet data use report is ready!

You can see the HTML rendered version of this report at:

{0}

Raw text and JSON-formatted versions of the report are also available for
download from this link. 

A copy of the text version has also been uploaded to your GitHub 
repository under the "reports" folder at:

{1}

A full list of all available reports can be accessed from:

{2}

You can find more information on the reporting system, along with an
explanation of each metric, at:

http://www.vertnet.org/resources/usagereportingguide.html

Please post any comments or questions to:
http://www.vertnet.org/feedback/contact.html

Thank you for being a part of VertNet.
""".format(link, link_gh, link_all)

            labels = ['report']
            request_url = '{0}/{1}/{2}/issues'.format(GH_REPOS, org, repo)
            json_input = json.dumps({
                'title': title,
                'body': body,
                'labels': labels
            })

            # Make GitHub call
            r = urlfetch.fetch(
                url=request_url,
                method=urlfetch.POST,
                headers=headers,
                payload=json_input
            )

            # Check output
            # HTTP 201 = Success
            if r.status_code == 201:
                s =  "Version: %s\n" % __version__
                s += "Status: %s. Issue %s sent." % (r.status_code, report_key.id())
                logging.info(s)
                report_entity.issue_sent = True
            # Other generic problems
            else:
                resp = {
                    "status": "failed",
                    "message": "Got uncaught error code when uploading"
                               " report to GitHub. Aborting issue creation.",
                    "source": "send_to_github",
                    "data": {
                        "report_key": report_key,
                        "period": self.period,
                        "testing": self.testing,
                        "error_code": r.status_code,
                        "error_content": r.content
                    }
                }
                s =  "Version: %s\n" % __version__
                s += "Response: %s. " % resp
                logging.error(s)
                return

        # This 'else' should NEVER happen
        else:
            s =  "Version: %s\n" % __version__
            s += "Issue for %s was already sent, " % report_key.id()
            s += "but 'issue_sent' property was 'False'. "
            s += "This call should not have happened."
            logging.error(s)

        # Store updated version of Report entity
        report_entity.put()

        # Wait 2 seconds to avoid GitHub abuse triggers, 1 isn't sufficient
        time.sleep(2)

        return
Example #8
0
    def store_report(self, report_entity):
        """."""

        report_key = report_entity.key
        logging.info("Ready to store %s" % report_key.id())

        gbifdatasetid = report_entity.reported_resource.id()
        logging.info("Storing report for dataset {0}".format(gbifdatasetid))

        # Build variables
        dataset_key = report_entity.reported_resource
        period_key = report_entity.reported_period
        dataset_entity, period_entity = ndb.get_multi([dataset_key,
                                                       period_key])

        # Check that dataset exists
        if not dataset_entity:
            self.error(500)
            resp = {
                "status": "error",
                "message": "Missing dataset in datastore."
                           " Please run /setup_datasets to fix",
                "data": {
                    "missing_dataset_key": dataset_key
                }
            }
            logging.error(resp)
            self.response.write(json.dumps(resp)+"\n")
            return

        # GitHub stuff
        org = dataset_entity.github_orgname
        repo = dataset_entity.github_reponame
        logging.info(org)
        logging.info(repo)
        key = apikey('ghb')
        user_agent = 'VertNet'

        # Testing block
        if self.testing:
            logging.info("Using testing repositories in jotegui")
            org = 'jotegui'
            repo = 'statReports'
            user_agent = 'jotegui'
            key = apikey('jot')

        # GitHub request headers
        headers = {
            'User-Agent': user_agent,
            'Authorization': 'token {0}'.format(key),
            "Accept": "application/vnd.github.v3+json"
        }

        # Upload txt report to GitHub, only if not previously stored
        if report_entity.stored is False:

            # Load template
            template = JINJA_ENVIRONMENT.get_template('report.txt')

            # Render template with values from Report
            content = template.render(
                dataset=dataset_entity,
                report=report_entity,
                period=period_entity
            )

            # Build GitHub request parameters: message
            message = content.split("\n")[1]  # 2nd line of txt report

            # Build GitHub request parameters: committer
            committer = GH_COMMITTER

            # Build GitHub request parameters: content
            content_enc = base64.b64encode(content.encode('utf-8'))

            # Build GitHub request parameters
            json_input = json.dumps({
                "message": message,
                "committer": committer,
                "content": content_enc
            })

            # Build GitHub request URL: path
            txt_path = "-".join([dataset_entity.icode,
                                dataset_entity.ccode,
                                "-".join([self.period[:4], self.period[4:]])])
            path = "reports/{0}.txt".format(txt_path)
            logging.info(path)

            # Build GitHub request URL
            request_url = '{0}/{1}/{2}/contents/{3}'.format(GH_REPOS,
                                                            org, repo, path)
            logging.info(request_url)

            # Make GitHub call
            r = urlfetch.fetch(
                url=request_url,
                method=urlfetch.PUT,
                headers=headers,
                payload=json_input
            )

            # Check output
            logging.info(r.status_code)

            # HTTP 201 = Success
            if r.status_code == 201:
                logging.info("Report %s successfully stored" % report_key.id())
                report_entity.stored = True
            # HTTP 422 = 'SHA' missing, meaning report was already there
            elif r.status_code == 422:
                logging.warning("Report %s was already stored, but 'stored'"
                                " property was stored as 'False'. This call"
                                " shouldn't have happened" % report_key.id())
                logging.error(r.content)
                report_entity.stored = True
            # Other generic problems
            else:
                logging.error("Report %s couldn't be stored" % report_key.id())
                logging.error(r.content)
                resp = {
                    "status": "failed",
                    "message": "Got uncaught error code when uploading"
                               " report to GitHub. Aborting issue creation.",
                    "source": "send_to_github",
                    "data": {
                        "report_key": report_key,
                        "period": self.period,
                        "testing": self.testing,
                        "error_code": r.status_code,
                        "error_content": r.content
                    }
                }
                logging.error(resp)
                return

        # This 'else' should NEVER happen
        else:
            logging.warning("Report %s was already stored. This call"
                            " shouldn't have happened" % report_key.id())

        # Store updated version of Report entity
        report_entity.put()

        # Wait 2 seconds to avoid GitHub abuse triggers
        time.sleep(2)

        return
import requests
import json
import logging

from util import apikey

__author__ = '@jotegui'


ghb_url = 'https://api.github.com'
cdb_url = "https://vertnet.cartodb.com/api/v2/sql"
testing = False
key = apikey(testing)
headers = {
    'User-Agent': 'VertNet',  # Authenticate as VertNet
    'Accept': 'application/vnd.github.v3+json',  # Require version 3 of the API (for stability)
    'Authorization': 'token {0}'.format(key)  # Provide the API key
}


class ConsistencyError(Exception):
	def __init__(self,value):
		self.value=value
	def __str__(self):
		return repr(self.value)


def get_all_repos():
    """Extract a list of all github_orgnames and github_reponames from CartoDB."""
    query = "select github_orgname, github_reponame from resource_staging where ipt is true and networks like '%VertNet%';"
    params = {'q':query}
Example #10
0
    def store_report(self, report_entity):
        """Write report file to GitHub."""

        # Build variables
        dataset_key = report_entity.reported_resource
        period_key = report_entity.reported_period
        dataset_entity, period_entity = ndb.get_multi(
            [dataset_key, period_key])
        report_key = report_entity.key
        gbifdatasetid = report_entity.reported_resource.id()

        # Check that dataset exists
        if not dataset_entity:
            self.error(500)
            resp = {
                "status":
                "error",
                "message":
                "Missing dataset in datastore. Please run /setup_datasets "
                "or remove associated Period entity from data store to fix.",
                "data": {
                    "missing_dataset_key": gbifdatasetid
                }
            }
            s = "Version: %s\n" % __version__
            s += "Response: %s" % resp
            logging.error(s)
            self.response.write(json.dumps(resp) + "\n")

            # Set 'stored' to True to avoid endless loop in the case a dataset does
            # not exist in the datastore.
            # TODO: Better if the Report entity had a flag for 'storage_skipped'
            # with default None. But, for now...
            report_entity.stored = True

            # Store updated version of Report entity
            report_entity.put()

            return

        # GitHub stuff
        org = dataset_entity.github_orgname
        repo = dataset_entity.github_reponame
        user_agent = 'VertNet'
        key = apikey('ghb')

        # Testing block
        if self.testing:
            org = 'VertNet'
            repo = 'statReports'
            user_agent = 'VertNet'
            key = apikey('ghb')

        # GitHub request headers
        headers = {
            'User-Agent': user_agent,
            'Authorization': 'token {0}'.format(key),
            "Accept": "application/vnd.github.v3+json"
        }

        # Load template
        template = JINJA_ENVIRONMENT.get_template('report.txt')

        # Render template with values from Report
        content = template.render(dataset=dataset_entity,
                                  report=report_entity,
                                  period=period_entity)

        # Build GitHub request parameters: message
        message = content.split("\n")[1]  # 2nd line of txt report

        # Build GitHub request parameters: committer
        committer = GH_COMMITTER

        # Build GitHub request parameters: content
        content_enc = base64.b64encode(content.encode('utf-8'))

        # Build GitHub request parameters
        json_input = json.dumps({
            "message": message,
            "committer": committer,
            "content": content_enc
        })

        # Build GitHub request URL: path
        txt_path = "-".join([
            dataset_entity.icode, dataset_entity.ccode,
            "-".join([self.period[:4], self.period[4:]])
        ])
        path = "reports/{0}.txt".format(txt_path)

        s = "Storing at: %s/%s/%s/%s" % (GH_REPOS, org, repo, path)
        logging.info(s)

        # Build GitHub request URL
        request_url = '{0}/{1}/{2}/contents/{3}'.format(
            GH_REPOS, org, repo, path)

        # Make GitHub call
        r = urlfetch.fetch(url=request_url,
                           method=urlfetch.PUT,
                           headers=headers,
                           payload=json_input)

        # Check output
        # HTTP 201 = Success
        if r.status_code == 201:
            report_entity.stored = True

            s = "Report %s sent " % report_key.id()
            s += "for gbifdatasetid %s to %s " % (gbifdatasetid, path)
            logging.info(s)

        # HTTP 422 = 'SHA' missing, meaning report was already there
        elif r.status_code == 422:
            report_entity.stored = True

            s = "Report %s was already stored " % report_key.id()
            s += "for gbifdatasetid %s at %s " % (gbifdatasetid, path)
            logging.warning(s)

        # Other generic problems
        else:
            resp = {
                "status": "failed",
                "message":
                "Got uncaught error code when uploading report to GitHub.",
                "source": "send_to_github",
                "data": {
                    "report_key": report_key,
                    "period": self.period,
                    "gbifdatasetid": gbifdatasetid,
                    "github_issue": self.github_issue,
                    "testing": self.testing,
                    "error_code": r.status_code,
                    "error_content": r.content
                }
            }

            s = "Version: %s\n" % __version__
            s += "Response: " % resp
            logging.error(s)

            return

        # Store updated version of Report entity
        report_entity.put()

        # Wait 2 seconds to avoid GitHub abuse triggers. 1 isn't sufficient.
        time.sleep(2)

        return