def test_now(self): s = strict_rfc3339.now_to_rfc3339_localoffset() w = strict_rfc3339.rfc3339_to_timestamp(s) assert s[-6:] == ["-05:00", "-04:00"][time.localtime(w).tm_isdst] d = int(time.time()) - w assert d == 0 or d == 1 s = strict_rfc3339.now_to_rfc3339_localoffset(False) assert abs(strict_rfc3339.rfc3339_to_timestamp(s) - time.time()) <= 0.1
def process(self, element): logging.info('CountAverages start: %s %r' % (type(element), element)) stat_names = ["temperature", "pressure", "humidity", "windgen", "solargen"] avg_e = {} aggr = {} for k in stat_names: aggr[k] = (0, 0) avg_e['clientid'] = element[0] avg_e['timestamp'] = strict_rfc3339.now_to_rfc3339_localoffset() # Emit sum and count for each metric for elem_map in element[1]: for key in stat_names: if key in elem_map: value = elem_map[key] aggr[key] = (aggr[key][0] + value, aggr[key][1] + 1) # Calculate average and set in return map for key, value in aggr.iteritems(): if value[1] == 0: avg_e[key] = 0 else: avg_e[key] = value[0] / value[1] logging.info('CountAverages end: {}'.format(avg_e)) return [avg_e]
def process(self, element): stat_names = ["I", "U", "Tm"] avg_e = {} aggr = {} for k in stat_names: aggr[k] = (0, 0) avg_e['deviceid'] = element[0] avg_e['timestamp'] = strict_rfc3339.now_to_rfc3339_localoffset() # Emit sum and count for each metric for elem_map in element[1]: for key in stat_names: if key in elem_map: value = elem_map[key] aggr[key] = (aggr[key][0] + value, aggr[key][1] + 1) # Calculate average and set in return map for key, value in aggr.items(): if value[1] == 0: avg_e[key] = 0 else: avg_e[key] = value[0] / value[1] logging.info('Engine_avr: {}'.format(avg_e)) return [avg_e]
for dataset in data_all: ## Skip big lottery for testing #if dataset['identifier'] == 'a002400000Z58cqAAB': # continue metadata = dataset.get('datagetter_metadata', {}) dataset['datagetter_metadata'] = metadata if not dataset['license'] in acceptable_licenses + unacceptable_licenses: raise ValueError('Unrecognised license ' + dataset['license']) url = dataset['distribution'][0]['downloadURL'] if args.download: metadata[ 'datetime_downloaded'] = strict_rfc3339.now_to_rfc3339_localoffset( ) try: r = requests.get( url, headers={ 'User-Agent': 'datagetter (https://github.com/ThreeSixtyGiving/datagetter)' }) r.raise_for_status() except: print("\n\nDownload failed for dataset {}\n".format( dataset['identifier'])) traceback.print_exc() exit_status = 1 metadata['downloads'] = False else:
def fetch_and_convert(args, dataset): r = None metadata = dataset.get('datagetter_metadata', {}) dataset['datagetter_metadata'] = metadata if not dataset['license'] in acceptable_licenses + unacceptable_licenses: raise ValueError('Unrecognised license ' + dataset['license']) url = dataset['distribution'][0]['downloadURL'] if args.download: proxies = None metadata[ 'datetime_downloaded'] = strict_rfc3339.now_to_rfc3339_localoffset( ) if args.socks5_proxy: proxies = { 'http': args.socks5_proxy, 'https': args.socks5_proxy, } try: print("Fetching %s" % url) r = requests.get( url, headers={ 'User-Agent': 'datagetter (https://github.com/ThreeSixtyGiving/datagetter)' }, proxies=proxies) r.raise_for_status() metadata['downloads'] = True except Exception as e: if isinstance(e, KeyboardInterrupt): raise print("\n\nDownload {} failed for dataset {}\n".format( url, dataset['identifier'])) traceback.print_exc() metadata['downloads'] = False metadata['error'] = str(e) if not isinstance(e, requests.exceptions.HTTPError): return content_type = r.headers.get('content-type', '').split(';')[0].lower() if content_type and content_type in CONTENT_TYPE_MAP: file_type = CONTENT_TYPE_MAP[content_type] elif 'content-disposition' in r.headers: file_type = rfc6266.parse_requests_response( r).filename_unsafe.split('.')[-1] else: file_type = url.split('.')[-1] if file_type not in CONTENT_TYPE_MAP.values(): print("\n\nUnrecognised file type {}\n".format(file_type)) return # Check that the downloaded json file is valid json and not junk from the webserver # e.g. a 500 error being output without the proper status code. if file_type == "json": try: json.loads(r.text) except ValueError: print("\n\nJSON file provided by webserver is invalid") metadata['downloads'] = False metadata['error'] = "Invalid JSON file provided by webserver" return metadata['file_type'] = file_type file_name = args.data_dir + '/original/' + dataset[ 'identifier'] + '.' + file_type with open(file_name, 'wb') as fp: fp.write(r.content) else: # --no-download arg # We require the metadata to exist, it won't if the file failed to download correctly if metadata['downloads'] == False: print( "Skipping %s as it was not marked as successfully downloaded" % dataset['identifier']) return file_type = metadata['file_type'] file_name = args.data_dir + '/original/' + dataset[ 'identifier'] + '.' + file_type json_file_name = '{}/json_all/{}.json'.format(args.data_dir, dataset['identifier']) metadata['file_size'] = os.path.getsize(file_name) if args.convert and (args.convert_big_files or metadata['file_size'] < 10 * 1024 * 1024): if file_type == 'json': os.link(file_name, json_file_name) metadata['json'] = json_file_name else: try: print("Running convert on %s to %s" % (file_name, json_file_name)) convert_spreadsheet(file_name, json_file_name, file_type) except KeyboardInterrupt: raise except Exception: print( "\n\nUnflattening failed for file {}\n".format(file_name)) traceback.print_exc() metadata['json'] = None metadata["valid"] = False metadata["error"] = "Could not unflatten file" else: metadata['json'] = json_file_name metadata['acceptable_license'] = dataset['license'] in acceptable_licenses # We can only do anything with the JSON if it did successfully convert. if metadata.get('json'): format_checker = FormatChecker() if args.validate: try: with open(json_file_name, 'r') as fp: validate(json.load(fp), schema, format_checker=format_checker) except (ValidationError, ValueError): metadata['valid'] = False else: metadata['valid'] = True if metadata['valid']: os.link( json_file_name, '{}/json_valid/{}.json'.format(args.data_dir, dataset['identifier'])) data_valid.append(dataset) if metadata['acceptable_license']: os.link( json_file_name, '{}/json_acceptable_license_valid/{}.json'.format( args.data_dir, dataset['identifier'])) data_acceptable_license_valid.append(dataset) if metadata['acceptable_license']: os.link( json_file_name, '{}/json_acceptable_license/{}.json'.format( args.data_dir, dataset['identifier'])) data_acceptable_license.append(dataset)
def set_up_db_reading(report_label): """Get values db connection, makes connection and specifies output filenames. Args: arg1 (str): A "report_label" string for the output files (logs and data). Returns: dict: A dict of various values for the script to make db_connection, name files, etc. """ # Parse command line inputs. parser = argparse.ArgumentParser(description='inputs') parser.add_argument('-v', '--verbose', action='store_true', help='DEBUG-level logging.', required=False) parser.add_argument('-a', '--alliance', action='store_true', help='Filenames for AGR export.', required=False) parser.add_argument('-c', '--config_file', help='Supply filepath to credentials, optional.', required=False) parser.add_argument('-t', '--testing', action='store_true', help='Rollback db writes.', required=False) # Use parse_known_args() instead of parse_args() to handle only the args relevant here without crashing. # Extra arguments that may be relevant to specific scripts using this module are safely ignored. # args = parser.parse_args() args, extra_args = parser.parse_known_args() # Determine whether script is to run locally or in docker. config_file = args.config_file # Determine values for key variables. if config_file: config = configparser.ConfigParser() config.read(config_file) server = config['default']['Server'] database = config['default']['Database'] username = config['default']['User'] password = config['default']['PGPassword'] database_release = config['default']['Release'] assembly = config['default']['Assembly'] annotation_release = config['default']['AnnotationRelease'] alliance_schema = config['default']['AllianceSchema'] alliance_release = config['default']['AllianceRelease'] svn_username = config['default']['SVNUsername'] svn_password = config['default']['SVNPassword'] output_dir = './' input_dir = './' log_dir = './' else: server = os.environ['SERVER'] database = os.environ['DATABASE'] username = os.environ['USER'] password = os.environ['PGPASSWORD'] database_release = os.environ['RELEASE'] assembly = os.environ['ASSEMBLY'] annotation_release = os.environ['ANNOTATIONRELEASE'] alliance_schema = os.environ['ALLIANCESCHEMA'] alliance_release = os.environ['ALLIANCERELEASE'] svn_username = os.environ['SVNUSER'] svn_password = os.environ['SVNPASSWORD'] output_dir = '/src/output/' input_dir = '/src/input/' log_dir = '/src/logs/' # Send values to a dict. set_up_dict = {} set_up_dict['server'] = server set_up_dict['database'] = database set_up_dict['username'] = username set_up_dict['password'] = password set_up_dict['database_release'] = database_release set_up_dict['assembly'] = assembly set_up_dict['annotation_release'] = annotation_release set_up_dict['alliance_schema'] = alliance_schema set_up_dict['alliance_release'] = alliance_release set_up_dict['svn_username'] = svn_username set_up_dict['svn_password'] = svn_password set_up_dict['input_dir'] = input_dir set_up_dict['output_dir'] = output_dir # Determine if testing variable is True or false. set_up_dict['testing'] = args.testing # Output filename alliance = args.alliance if alliance is True: set_up_dict[ 'output_filename'] = output_dir + 'FB_' + alliance_schema + '_' + report_label + '.json' else: set_up_dict[ 'output_filename'] = output_dir + report_label + '_' + database + '.tsv' # Handle logging if alliance is True: log_filename = output_dir + 'FB_' + alliance_schema + '_' + report_label + '.log' else: log_filename = log_dir + report_label + '_' + database + '.log' file_handler = logging.FileHandler(log_filename, mode='a') formatter = logging.Formatter( '%(asctime)s : %(levelname)s : Line No %(lineno)d : %(message)s') file_handler.setFormatter(formatter) log.addHandler(file_handler) # Determine log level. verbose = args.verbose if verbose is True: log.setLevel(logging.DEBUG) else: log.setLevel(logging.INFO) sys.stdout = open(log_filename, 'a') set_up_dict['log'] = logging.getLogger(__name__) # Establish database connection. set_up_dict['conn'], conn_description = establish_db_connection( server, database, username, password) # Official timestamp for this script. set_up_dict['the_time'] = strict_rfc3339.now_to_rfc3339_localoffset() log.info('Done setting up the environment, db connections and logging.') log.info(conn_description) if extra_args != []: log.info( 'These extra arguments were not used by set_up_db_reading(): {}'. format(extra_args)) return set_up_dict
def now(): """the current time point""" return str(strict_rfc3339.now_to_rfc3339_localoffset())