def test_now(self):
        s = strict_rfc3339.now_to_rfc3339_localoffset()
        w = strict_rfc3339.rfc3339_to_timestamp(s)
        assert s[-6:] == ["-05:00", "-04:00"][time.localtime(w).tm_isdst]

        d = int(time.time()) - w
        assert d == 0 or d == 1

        s = strict_rfc3339.now_to_rfc3339_localoffset(False)
        assert abs(strict_rfc3339.rfc3339_to_timestamp(s) - time.time()) <= 0.1
    def test_now(self):
        s = strict_rfc3339.now_to_rfc3339_localoffset()
        w = strict_rfc3339.rfc3339_to_timestamp(s)
        assert s[-6:] == ["-05:00", "-04:00"][time.localtime(w).tm_isdst]

        d = int(time.time()) - w
        assert d == 0 or d == 1

        s = strict_rfc3339.now_to_rfc3339_localoffset(False)
        assert abs(strict_rfc3339.rfc3339_to_timestamp(s) - time.time()) <= 0.1
Example #3
0
    def process(self, element):
        logging.info('CountAverages start: %s %r' % (type(element), element))
        stat_names = ["temperature",
                      "pressure",
                      "humidity",
                      "windgen",
                      "solargen"]

        avg_e = {}
        aggr = {}
        for k in stat_names:
            aggr[k] = (0, 0)

        avg_e['clientid'] = element[0]
        avg_e['timestamp'] = strict_rfc3339.now_to_rfc3339_localoffset()

        # Emit sum and count for each metric
        for elem_map in element[1]:
            for key in stat_names:
                if key in elem_map:
                    value = elem_map[key]
                    aggr[key] = (aggr[key][0] + value, aggr[key][1] + 1)

        # Calculate average and set in return map
        for key, value in aggr.iteritems():
            if value[1] == 0:
                avg_e[key] = 0
            else:
                avg_e[key] = value[0] / value[1]
        logging.info('CountAverages end: {}'.format(avg_e))

        return [avg_e]
Example #4
0
    def process(self, element):
        stat_names = ["I", "U", "Tm"]

        avg_e = {}
        aggr = {}
        for k in stat_names:
            aggr[k] = (0, 0)

        avg_e['deviceid'] = element[0]
        avg_e['timestamp'] = strict_rfc3339.now_to_rfc3339_localoffset()

        # Emit sum and count for each metric
        for elem_map in element[1]:
            for key in stat_names:
                if key in elem_map:
                    value = elem_map[key]
                    aggr[key] = (aggr[key][0] + value, aggr[key][1] + 1)

        # Calculate average and set in return map
        for key, value in aggr.items():
            if value[1] == 0:
                avg_e[key] = 0
            else:
                avg_e[key] = value[0] / value[1]
        logging.info('Engine_avr: {}'.format(avg_e))

        return [avg_e]
Example #5
0
for dataset in data_all:
    ## Skip big lottery for testing
    #if dataset['identifier'] == 'a002400000Z58cqAAB':
    #    continue

    metadata = dataset.get('datagetter_metadata', {})
    dataset['datagetter_metadata'] = metadata

    if not dataset['license'] in acceptable_licenses + unacceptable_licenses:
        raise ValueError('Unrecognised license ' + dataset['license'])

    url = dataset['distribution'][0]['downloadURL']

    if args.download:
        metadata[
            'datetime_downloaded'] = strict_rfc3339.now_to_rfc3339_localoffset(
            )
        try:
            r = requests.get(
                url,
                headers={
                    'User-Agent':
                    'datagetter (https://github.com/ThreeSixtyGiving/datagetter)'
                })
            r.raise_for_status()
        except:
            print("\n\nDownload failed for dataset {}\n".format(
                dataset['identifier']))
            traceback.print_exc()
            exit_status = 1
            metadata['downloads'] = False
        else:
Example #6
0
def fetch_and_convert(args, dataset):
    r = None

    metadata = dataset.get('datagetter_metadata', {})
    dataset['datagetter_metadata'] = metadata

    if not dataset['license'] in acceptable_licenses + unacceptable_licenses:
        raise ValueError('Unrecognised license ' + dataset['license'])

    url = dataset['distribution'][0]['downloadURL']

    if args.download:
        proxies = None
        metadata[
            'datetime_downloaded'] = strict_rfc3339.now_to_rfc3339_localoffset(
            )
        if args.socks5_proxy:
            proxies = {
                'http': args.socks5_proxy,
                'https': args.socks5_proxy,
            }

        try:
            print("Fetching %s" % url)
            r = requests.get(
                url,
                headers={
                    'User-Agent':
                    'datagetter (https://github.com/ThreeSixtyGiving/datagetter)'
                },
                proxies=proxies)
            r.raise_for_status()

            metadata['downloads'] = True
        except Exception as e:
            if isinstance(e, KeyboardInterrupt):
                raise

            print("\n\nDownload {} failed for dataset {}\n".format(
                url, dataset['identifier']))
            traceback.print_exc()
            metadata['downloads'] = False
            metadata['error'] = str(e)

            if not isinstance(e, requests.exceptions.HTTPError):
                return

        content_type = r.headers.get('content-type', '').split(';')[0].lower()
        if content_type and content_type in CONTENT_TYPE_MAP:
            file_type = CONTENT_TYPE_MAP[content_type]
        elif 'content-disposition' in r.headers:
            file_type = rfc6266.parse_requests_response(
                r).filename_unsafe.split('.')[-1]
        else:
            file_type = url.split('.')[-1]
        if file_type not in CONTENT_TYPE_MAP.values():
            print("\n\nUnrecognised file type {}\n".format(file_type))
            return

        # Check that the downloaded json file is valid json and not junk from the webserver
        # e.g. a 500 error being output without the proper status code.
        if file_type == "json":
            try:
                json.loads(r.text)
            except ValueError:
                print("\n\nJSON file provided by webserver is invalid")
                metadata['downloads'] = False
                metadata['error'] = "Invalid JSON file provided by webserver"
                return

        metadata['file_type'] = file_type

        file_name = args.data_dir + '/original/' + dataset[
            'identifier'] + '.' + file_type
        with open(file_name, 'wb') as fp:
            fp.write(r.content)
    else:
        # --no-download arg

        # We require the metadata to exist, it won't if the file failed to download correctly
        if metadata['downloads'] == False:
            print(
                "Skipping %s as it was not marked as successfully downloaded" %
                dataset['identifier'])
            return

        file_type = metadata['file_type']
        file_name = args.data_dir + '/original/' + dataset[
            'identifier'] + '.' + file_type

    json_file_name = '{}/json_all/{}.json'.format(args.data_dir,
                                                  dataset['identifier'])

    metadata['file_size'] = os.path.getsize(file_name)

    if args.convert and (args.convert_big_files
                         or metadata['file_size'] < 10 * 1024 * 1024):
        if file_type == 'json':
            os.link(file_name, json_file_name)
            metadata['json'] = json_file_name
        else:
            try:
                print("Running convert on %s to %s" %
                      (file_name, json_file_name))
                convert_spreadsheet(file_name, json_file_name, file_type)
            except KeyboardInterrupt:
                raise
            except Exception:
                print(
                    "\n\nUnflattening failed for file {}\n".format(file_name))
                traceback.print_exc()
                metadata['json'] = None
                metadata["valid"] = False
                metadata["error"] = "Could not unflatten file"
            else:
                metadata['json'] = json_file_name

    metadata['acceptable_license'] = dataset['license'] in acceptable_licenses

    # We can only do anything with the JSON if it did successfully convert.
    if metadata.get('json'):
        format_checker = FormatChecker()
        if args.validate:
            try:
                with open(json_file_name, 'r') as fp:
                    validate(json.load(fp),
                             schema,
                             format_checker=format_checker)
            except (ValidationError, ValueError):
                metadata['valid'] = False
            else:
                metadata['valid'] = True

        if metadata['valid']:
            os.link(
                json_file_name,
                '{}/json_valid/{}.json'.format(args.data_dir,
                                               dataset['identifier']))
            data_valid.append(dataset)
            if metadata['acceptable_license']:
                os.link(
                    json_file_name,
                    '{}/json_acceptable_license_valid/{}.json'.format(
                        args.data_dir, dataset['identifier']))
                data_acceptable_license_valid.append(dataset)

        if metadata['acceptable_license']:
            os.link(
                json_file_name, '{}/json_acceptable_license/{}.json'.format(
                    args.data_dir, dataset['identifier']))
            data_acceptable_license.append(dataset)
def set_up_db_reading(report_label):
    """Get values db connection, makes connection and specifies output filenames.

    Args:
        arg1 (str): A "report_label" string for the output files (logs and data).

    Returns:
        dict: A dict of various values for the script to make db_connection, name files, etc.

    """
    # Parse command line inputs.
    parser = argparse.ArgumentParser(description='inputs')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='DEBUG-level logging.',
                        required=False)
    parser.add_argument('-a',
                        '--alliance',
                        action='store_true',
                        help='Filenames for AGR export.',
                        required=False)
    parser.add_argument('-c',
                        '--config_file',
                        help='Supply filepath to credentials, optional.',
                        required=False)
    parser.add_argument('-t',
                        '--testing',
                        action='store_true',
                        help='Rollback db writes.',
                        required=False)
    # Use parse_known_args() instead of parse_args() to handle only the args relevant here without crashing.
    # Extra arguments that may be relevant to specific scripts using this module are safely ignored.
    # args = parser.parse_args()
    args, extra_args = parser.parse_known_args()

    # Determine whether script is to run locally or in docker.
    config_file = args.config_file

    # Determine values for key variables.
    if config_file:
        config = configparser.ConfigParser()
        config.read(config_file)
        server = config['default']['Server']
        database = config['default']['Database']
        username = config['default']['User']
        password = config['default']['PGPassword']
        database_release = config['default']['Release']
        assembly = config['default']['Assembly']
        annotation_release = config['default']['AnnotationRelease']
        alliance_schema = config['default']['AllianceSchema']
        alliance_release = config['default']['AllianceRelease']
        svn_username = config['default']['SVNUsername']
        svn_password = config['default']['SVNPassword']
        output_dir = './'
        input_dir = './'
        log_dir = './'
    else:
        server = os.environ['SERVER']
        database = os.environ['DATABASE']
        username = os.environ['USER']
        password = os.environ['PGPASSWORD']
        database_release = os.environ['RELEASE']
        assembly = os.environ['ASSEMBLY']
        annotation_release = os.environ['ANNOTATIONRELEASE']
        alliance_schema = os.environ['ALLIANCESCHEMA']
        alliance_release = os.environ['ALLIANCERELEASE']
        svn_username = os.environ['SVNUSER']
        svn_password = os.environ['SVNPASSWORD']
        output_dir = '/src/output/'
        input_dir = '/src/input/'
        log_dir = '/src/logs/'

    # Send values to a dict.
    set_up_dict = {}
    set_up_dict['server'] = server
    set_up_dict['database'] = database
    set_up_dict['username'] = username
    set_up_dict['password'] = password
    set_up_dict['database_release'] = database_release
    set_up_dict['assembly'] = assembly
    set_up_dict['annotation_release'] = annotation_release
    set_up_dict['alliance_schema'] = alliance_schema
    set_up_dict['alliance_release'] = alliance_release
    set_up_dict['svn_username'] = svn_username
    set_up_dict['svn_password'] = svn_password
    set_up_dict['input_dir'] = input_dir
    set_up_dict['output_dir'] = output_dir

    # Determine if testing variable is True or false.
    set_up_dict['testing'] = args.testing

    # Output filename
    alliance = args.alliance
    if alliance is True:
        set_up_dict[
            'output_filename'] = output_dir + 'FB_' + alliance_schema + '_' + report_label + '.json'
    else:
        set_up_dict[
            'output_filename'] = output_dir + report_label + '_' + database + '.tsv'

    # Handle logging
    if alliance is True:
        log_filename = output_dir + 'FB_' + alliance_schema + '_' + report_label + '.log'
    else:
        log_filename = log_dir + report_label + '_' + database + '.log'
    file_handler = logging.FileHandler(log_filename, mode='a')
    formatter = logging.Formatter(
        '%(asctime)s : %(levelname)s : Line No %(lineno)d : %(message)s')
    file_handler.setFormatter(formatter)
    log.addHandler(file_handler)
    # Determine log level.
    verbose = args.verbose
    if verbose is True:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.INFO)
    sys.stdout = open(log_filename, 'a')
    set_up_dict['log'] = logging.getLogger(__name__)

    # Establish database connection.
    set_up_dict['conn'], conn_description = establish_db_connection(
        server, database, username, password)

    # Official timestamp for this script.
    set_up_dict['the_time'] = strict_rfc3339.now_to_rfc3339_localoffset()

    log.info('Done setting up the environment, db connections and logging.')
    log.info(conn_description)
    if extra_args != []:
        log.info(
            'These extra arguments were not used by set_up_db_reading(): {}'.
            format(extra_args))

    return set_up_dict
Example #8
0
def now():
    """the current time point"""
    return str(strict_rfc3339.now_to_rfc3339_localoffset())
Example #9
0
def now():
    """the current time point"""
    return str(strict_rfc3339.now_to_rfc3339_localoffset())