def main(args): path_config, subset_selection_config, simple_training_config, advanced_training_config = \ utils.parse_config_file(args.config_file_name) utils.init_gpu(memory_limit=args.gpu_memory_limit) (train_images, train_labels), (test_images, test_labels) = load_subset_database(args, path_config, subset_selection_config) assert len(train_images) == len(train_labels) assert len(test_images) == len(test_labels) logging.info(f'Number of train samples: {len(train_images)}') logging.info(f'Number of test samples: {len(test_images)}') if args.scenario == 'experimental': train_experimental_model(train_images, train_labels, test_images, test_labels) elif args.scenario == 'simple': train_basic_model(args, path_config, simple_training_config, train_images, train_labels, test_images, test_labels) elif args.scenario == 'advanced': train_advanced_model(args, path_config, advanced_training_config, train_images, train_labels, test_images, test_labels) else: assert False, f'Unhandled scenario configuration: {args.scenario}'
def parse_config_file(self): """ This function calls out to the configuration parser, which contains various constants to run this pipeline """ target_section = 'DEMUX' logging.info('Looking to parse the [%s] section from a configuration file.' % target_section) self.config_params_dict = utils.parse_config_file(target_section) logging.info('Parameters parsed from configuration file: ') logging.info(self.config_params_dict)
def setUp(self): # a basic dictionary to hold the config parameters that don't change # recall that we're mocking out the configuration file parsing process """ self.static_params = {} self.static_params['retention_period'] = '30' self.static_params['reminder_intervals'] = ['14','7','3'] self.static_params['date_format'] = '%m/%d/%Y' """ self.static_params = utils.parse_config_file('TRACKING') self.test_input_files = [] # an empty file f2 = os.path.join(this_dir, 'test_db_file2.db') self.test_input_files.append(f2) with open(f2, 'w') as fout: fout.write('\n') # malformatted date f3 = os.path.join(this_dir, 'test_db_file3.db') self.test_input_files.append(f3) with open(f3, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', '3/13/17' ])) # mismatching mbucket names for same project f4 = os.path.join(this_dir, 'test_db_file4.db') self.test_input_files.append(f4) with open(f4, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', '03/13/2017' ])) # a 'good' existing db f5 = os.path.join(this_dir, 'test_db_file5.db') self.test_input_files.append(f5) with open(f5, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', '03/13/2017' ])) # a 'good' existing db f6 = os.path.join(this_dir, 'test_db_file6.db') self.test_input_files.append(f6) with open(f6, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', '03/13/2017' ]))
def main(): # load some config parameters params = utils.parse_config_file('TRACKING') # look in the various places to determine that all database files are consistent db_collection = parse_all_db_files(params) # get the database contents- see function for details db_contents = ensure_consistency(db_collection) # scan the contents, perform the appropriate actions scan_db(db_contents, params)
def main(): now = time.time() config = utils.parse_config_file("config/config.yaml") response = requests.get(URL, params=dict( APPID=config['openweathermap_credentials']['api_key'], q="Dublin,IE" )) new_data = {obj['dt']: obj for obj in response.json()['list']} s3 = boto3.client('s3', **config['aws_credentials']) tmp_data = json.loads(s3.get_object(Bucket='ww-scraper', Key=FILE_KEY)['Body'].read()) current_data, deleted_dts = delete_old_dts(tmp_data, now) new_dts, updated_dts = merge_new_data_into_current_data(current_data, new_data, now) print("New dts: %s" % (new_dts,)) print("Deleted dts: %s" % (deleted_dts,)) print("Updated dts: %s" % (updated_dts,)) s3.put_object(Bucket='ww-scraper', Key=FILE_KEY, Body=json.dumps(current_data, indent=4))
def parse_args(): """Read and parse options from command line Initialize logging Return values: options: options and arguments parsed from command line confs: configuration file dictionary information """ usage = """usage: %prog [options] To crawl pages from web in terms of special URL patterns.""" parser = optparse.OptionParser(usage=usage, version='%prog 1.0.0.0') parser.add_option('-c', dest='filename', help='read config file', metavar='FILE') parser.add_option('-l', '--log', dest='log', action='store_true', help='start logging', default=False) options, args = parser.parse_args() if options.filename is None: print parser.format_help() parser.exit() else: confs = utils.parse_config_file(options.filename) if options.log: log.read_config_file(confs['log']['log_config_file']) log.install(confs['log']['log_name']) else: log.uninstall() return options, confs
def main(project_mapping): """ project_mapping is a two-level nested dict. The first level's keys are the iLab project IDs and each one maps to a dict Each 'second level' dict has a bucket and client_emails key, which give the bucket name gs://<bucket name> and a list of emails, respectively """ logging.info('In cloud tracking module') logging.info('Project mapping: %s' % project_mapping) # get some configuration parameters params = utils.parse_config_file('TRACKING') # need to cleanup some of the parameters: try: params['retention_period'] = int(params['retention_period']) logging.info('Params read from config: %s' % params) logging.info('Retention period set to %s days' % params['retention_period']) except: logging.error('Could not interpret one of the configuration parameters correctly. Check that the intended data types match those in the config file') sys.exit(1) # set the expiration date target_date = datetime.datetime.now() + datetime.timedelta(days=params['retention_period']) # read the database file this_dir = os.path.dirname(os.path.realpath(__file__)) params['data_retention_db'] = os.path.join(this_dir, params['data_retention_db']) if os.path.isfile(params['data_retention_db']): logging.info('About to parse database file' ) project_database = utils.load_database(params['data_retention_db'], params) logging.info('Parsed from the database: %s' % project_database) else: logging.error('Could not find a database file at %s' % params['data_retention_db']) raise MissingPrimaryDatabaseException('The primary database file is missing. Fix that.') for project_id, info_dict in project_mapping.items(): logging.info('Checking project with iLab ID: %s' % project_id) # perhaps we have an ongoing project- then a bucket for this iLab ID probably already exists if project_id in project_database: logging.info('project with ID %s was already in our database. Plan to update the deletion date' % project_id) # get the info we have about this in our database db_entry = project_database[project_id] # ensure the bucket names match. If they do, simply update the retention target date and the email contacts if info_dict['bucket'] == db_entry['bucket']: logging.info('The delivery buckets matched, as expected') logging.info('Changing deletion date from %s to %s' % (db_entry['target_date'].strftime(params['date_format']), target_date.strftime(params['date_format']))) db_entry['target_date'] = target_date existing_emails = set(db_entry['client_emails']) new_emails = set(info_dict['client_emails']) total_emails = existing_emails.union(new_emails) logging.info('Original emails were: %s' % existing_emails) logging.info('New emails were: %s' % new_emails) logging.info('The union of those sets of emails is %s' % total_emails) db_entry['client_emails'] = list(total_emails) else: # somehow the same iLab project was placed into a different bucket. Shouldn't happen, so raise an exception. We # retain 1-to-1 mapping beween ilab and buckets IDs. Maybe later we change this behavior based on a particular use-case logging.error('The bucket name did not match that of a prior project with the same iLab ID. This should not happen.') logging.error('The bucket found in the database was: %s' % db_entry['bucket']) logging.error('The bucket that was just uploaded the demux to was: %s' % info_dict['bucket']) raise MultipleBucketsForSameProjectException('The iLab IDs were the same, but the bucket was somehow different. Someone needs to check this!') #TODO- send a message for someone to fix it. else: logging.info('A new project will be added to the database.') logging.info('update info dict. Before %s, then add %s' % (info_dict, target_date)) info_dict.update({'target_date': target_date}) project_database[project_id] = info_dict logging.info('Project database: %s' % project_database) # now write to the database file: utils.write_to_db(project_database, params)
def convert(config_path, weights_path, output_path, remove_batch_normalization=False, save_keras=True, save_tfjs=False, plot_model=False): model_name = os.path.splitext(config_path)[0] print('Parsing darknet config...') unique_config_file = parse_config_file(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('Loading weights...') weights_file = open(weights_path, 'rb') major, minor, revision = np.ndarray(shape=(3, ), dtype='int32', buffer=weights_file.read(12)) if (major * 10 + minor) >= 2 and major < 1000 and minor < 1000: seen = np.ndarray(shape=(1, ), dtype='int64', buffer=weights_file.read(8)) else: seen = np.ndarray(shape=(1, ), dtype='int32', buffer=weights_file.read(4)) print('Weights Header: ', major, minor, revision, seen) input_layer = Input(shape=(None, None, 3)) prev_layer = input_layer all_layers = [] weight_decay = float(cfg_parser['net_0']['decay'] ) if 'net_0' in cfg_parser.sections() else 5e-4 weight_count = 0 # a number to keep with the read out_index = [] for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] padding = 'same' if pad == 1 and stride == 1 else 'valid' # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) conv_bias = np.ndarray(shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) weight_count += filters if batch_normalize: bn_weights = np.ndarray(shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) weight_count += 3 * filters # scale gamma / # shift beta / # running mean /# running var bn_weight_list = [ bn_weights[0], conv_bias, bn_weights[1], bn_weights[2] ] conv_weights = np.ndarray(shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) weight_count += weights_size # Darknet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # we would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) if remove_batch_normalization: if batch_normalize: conv_layer_weights = fold_batch_norm_layer( conv_weights, bn_weight_list) else: conv_layer_weights = [conv_weights, conv_bias] use_bias = True else: if batch_normalize: conv_layer_weights = [conv_weights] else: conv_layer_weights = [conv_weights, conv_bias] use_bias = not batch_normalize ######### # Create Conv2D layer if stride > 1: # Darknet uses left and top padding instead of 'same' mode prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer) # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) conv_layer = (Conv2D(filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=use_bias, weights=conv_layer_weights, activation=act_fn, padding=padding))(prev_layer) if not remove_batch_normalization and batch_normalize: conv_layer = (BatchNormalization( weights=bn_weight_list))(conv_layer) prev_layer = conv_layer ### if activation == 'leaky': prev_layer = LeakyReLU(alpha=0.1)(prev_layer) all_layers.append(prev_layer) elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: concatenate_layer = Concatenate()(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D(pool_size=(size, size), strides=(stride, stride), padding='same')(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('shortcut'): index = int(cfg_parser[section]['from']) activation = cfg_parser[section]['activation'] assert activation == 'linear', 'Only linear activation supported.' all_layers.append(Add()([all_layers[index], prev_layer])) prev_layer = all_layers[-1] elif section.startswith('reorg'): block_size = int(cfg_parser[section]['stride']) assert block_size == 2, 'Only reorg with stride 2 supported.' all_layers.append( Lambda(reorg, output_shape=reorg_shape, name=str(section))(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) assert stride == 2, 'Only stride=2 supported.' all_layers.append(UpSampling2D(stride)(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('softmax'): all_layers.append(Activation("softmax")(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('avgpool'): all_layers.append(GlobalAveragePooling2D()(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('yolo') or section.startswith('region'): out_index.append(len(all_layers) - 1) all_layers.append(None) prev_layer = all_layers[-1] anchors = cfg_parser[section]['anchors'] elif section.startswith('net'): pass else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format( weight_count, weight_count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) # Create and save model. if len(out_index) == 0: out_index.append(len(all_layers) - 1) input_l = input_layer out_l = [all_layers[i] for i in out_index] model = Model(inputs=input_l, outputs=out_l) print(model.summary()) if remove_batch_normalization: model_save_path = '{}/{}_nobn.h5'.format(output_path, model_name) model_summary_path = '{}/{}_nobn_summary.txt'.format( output_path, model_name) tfjs_path = '{}/{}-tfjs_nobn/'.format(output_path, model_name) plot_path = '{}/{}_nobn.png'.format(output_path, model_name) anchors_path = '{}/{}_nobn_anchors.txt'.format(output_path, model_name) else: model_save_path = '{}/{}.h5'.format(output_path, model_name) model_summary_path = '{}/{}_summary.txt'.format( output_path, model_name) tfjs_path = '{}/{}-tfjs/'.format(output_path, model_name) plot_path = '{}/{}.png'.format(output_path, model_name) anchors_path = '{}/{}_anchors.txt'.format(output_path, model_name) with open(anchors_path, 'w') as f: print(anchors, file=f) print('Saved anchors to {}'.format(anchors_path)) with open(model_summary_path, 'w') as f: model.summary(print_fn=lambda x: f.write(x + '\n')) print('Saved model summary to {}'.format(model_summary_path)) if save_keras: model.save(model_save_path) print('Saved Keras model to {}'.format(model_save_path)) if save_tfjs: import tensorflowjs as tfjs # tfjs.converters.save_keras_model(model, tfjs_path, quantization_dtype=np.uint8) tfjs.converters.save_keras_model(model, tfjs_path) print('Saved Tensorflowjs model to {}'.format(tfjs_path)) if plot_model: from keras.utils.vis_utils import plot_model as plot plot(model, to_file=plot_path, show_shapes=True) print('Saved image plot to {}'.format(plot_path))
print("[Evaluator] Initializing at %d training steps:" % crt_training_step) agent = eval_agent eval_env.get_crt_step(crt_training_step) agent.policy_evaluation.policy.load_state_dict(policy.state_dict()) preprocess = Preprocessor(cmdl.env_class).transform step_cnt = 0 o, r, done = eval_env.reset(), 0, False while step_cnt < cmdl.evaluator.eval_steps: s = preprocess(o) a = agent.evaluate_policy(s) o, r, done, _ = eval_env.step(a) step_cnt += 1 if done: o, r, done = eval_env.reset(), 0, False if __name__ == "__main__": # Parse cmdl args for the config file and return config as Namespace config = utils.parse_config_file(utils.parse_cmd_args()) # Assuming everything in the config is deterministic already. torch.manual_seed(config.seed) numpy.random.seed(config.seed) # Let's do this! train_agent(config)
print("--------- Training ----------") print("N-step : %d" % config.agent.n_horizon) print("Optim Freq : %d" % config.agent.update_freq) print("Batch : %d" % config.agent.batch_size) print("Fast Lr : %.6f" % config.agent.fast_lr) print("Slow Lr : %.6f" % config.agent.slow_lr) print("-----------------------------") print("stp, nst, act | return") print("-----------------------------") def display_stats(ep_cnt, step_cnt, elapsed_time): fps = step_cnt / elapsed_time print( clr( "[ %s ] finished after %d eps, %d steps." % ("Main", ep_cnt, step_cnt), 'white', 'on_magenta')) print( clr( "[ %s ] finished after %.2fs, %.2ffps." % ("Main", elapsed_time, fps), 'white', 'on_magenta')) if __name__ == "__main__": cmd_args = utils.parse_cmd_args() config = utils.parse_config_file(cmd_args) torch.manual_seed(config.seed) numpy.random.seed(config.seed) train_agent(config)
def entry_method(project_dir, params): """ project_dir is a string giving the full path to the project directory params is a dictionary of configuration parameters. """ # update the param dict to include the cloud-specific parameters cloud_specific_params = utils.parse_config_file('CLOUD') params.update(cloud_specific_params) logging.info('Updated params in cloud upload script: %s' % params) # correct the fastQC directory suffix name params['fastqc_dir_tag'] = params['final_fastq_tag'] + params[ 'fastqc_dir_suffix'] params['fastq_file_suffix'] = '.' + params['final_fastq_tag'] + '.fastq.gz' client_email_addresses = get_client_emails(project_dir, params) ilab_id = os.path.basename(project_dir).replace('_', '-').lower() driver = get_connection_driver(params) bucket_obj = get_or_create_bucket(ilab_id, driver, client_email_addresses, params) #TODO: collect lane-specific fastq fastq_files = collect_files(project_dir, 'fastq_file_suffix', params) logging.info('Found the following fastq files:') logging.info('\n'.join(fastq_files)) fastQC_dirs = collect_files(project_dir, 'fastqc_dir_tag', params) logging.info('Found the following fastQC report directories:') logging.info('\n'.join(fastQC_dirs)) #for fqd in fastQC_dirs: # prep_fastqc_files(fqd, bucket_obj.name, params) #zip-up fastQC directories zipfile = zip_fastqc_reports(fastQC_dirs, project_dir, params) # do uploads uploaded_objects = [] uploaded_objects.extend( upload_fastq_dir(fastq_files, project_dir, bucket_obj, params['cloud_fastq_root'], params)) #uploaded_objects.extend(upload(fastq_files, bucket_obj, params['cloud_fastq_root'], params)) uploaded_objects.extend( upload([ zipfile, ], bucket_obj, params['cloud_fastqc_root'], params)) # give permissions: give_permissions(driver, bucket_obj, uploaded_objects, client_email_addresses, params) # let the webapp know about the uploads: update_webapp_database(bucket_obj, uploaded_objects, client_email_addresses, params) # get the total upload size # commented out since using rsync #upload_size = calculate_upload_size(uploaded_objects) #logging.info('upload size in GB: %s' % upload_size) # upload the metadata file: upload([ os.path.join(project_dir, params['project_descriptor']), ], bucket_obj, '', params) # handle master metadata file update_project_mappings(driver, bucket_obj, client_email_addresses, params) # return the bucket name and the email addresses- need this information for the data retention process return bucket_obj.name, client_email_addresses
cwd = os.getcwd() if os.path.basename(cwd) != "src": raise Exception( "CHANGE CURRENT WORKING DIRECTORY TO THE `src` PATH BEFORE RUNNING!!" ) # Load config_file_path from commandline input args = parse_cl_args() config_file_path = args.config_file state_level = args.state_level keywords_filter = args.keywords_filter # Get config file object config = parse_config_file(config_file_path) # Intialize the Geo class and load lookup tables/dicts g = Geo() fip_lookup = g.load_fip_code_lookup() state_lookup = g.load_state_abbrv_lookup(as_dict=True) # Get base dir for county-level data and set data file paths county_data_dir = config["PATHS"]["COUNTY_DATA_DIR"] state_data_dir = config["PATHS"]["STATE_DATA_DIR"] covid_data_dir = config["PATHS"]["COVID_DATA_DIR"] intermediate_data_dir = config["PATHS"]["INTERMEDIATE_DATA_DIR"] people_file_path = os.path.join(county_data_dir, config["FILES"]["COUNTY_PEOPLE"]) income_file_path = os.path.join(county_data_dir,
def setUp(self): # a basic dictionary to hold the config parameters that don't change # recall that we're mocking out the configuration file parsing process self.static_params = utils.parse_config_file('TRACKING') self.test_input_files = [] # make a dummy bucket self.test_bucket = 'cccb-seq-tmp-testing' cmd = 'gsutil mb gs://%s' % self.test_bucket subprocess.Popen(cmd, shell=True) # need to sleep so the copy can work. time.sleep(5) # a dummy file to delete -- just copy this file cmd = 'gsutil cp %s gs://%s/' % (os.path.abspath(__file__), self.test_bucket) subprocess.Popen(cmd, shell=True) # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days # a second entry does NOT fall on any of the reminder days f1 = os.path.join(this_dir, 'test_db_file.db') self.test_input_files.append(f1) today = datetime.datetime.now() # set a target date that will trigger a notification target_expiration = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][1])) # set another target date that will NOT trigger a notification target_expiration_2 = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][1]) + 1) target_expiration_string = target_expiration.strftime( self.static_params['date_format']) target_expiration_string_2 = target_expiration_2.strftime( self.static_params['date_format']) with open(f1, 'w') as fout: fout.write('\t'.join([ 'abc-12345-678', self.test_bucket, '*****@*****.**', target_expiration_string ])) fout.write('\n') fout.write('\t'.join([ 'def', 'bucket-def', '*****@*****.**', target_expiration_string_2 ])) f2 = os.path.join(this_dir, 'test_db_file2.db') self.test_input_files.append(f2) # set a target date that will trigger a notification to CCCB target_expiration = today # set another target date that will NOT trigger a notification target_expiration_2 = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][1]) + 1) target_expiration_string = target_expiration.strftime( self.static_params['date_format']) target_expiration_string_2 = target_expiration_2.strftime( self.static_params['date_format']) with open(f2, 'w') as fout: fout.write('\t'.join([ 'abc-12345-678', self.test_bucket, '*****@*****.**', target_expiration_string ])) fout.write('\n') fout.write('\t'.join([ 'def', 'bucket-def', '*****@*****.**', target_expiration_string_2 ]))
'The google project name. If not given, there is a default (see -h)', default='cccb-data-delivery', dest='google_project') args = parser.parse_args() emails = [x.strip() for x in args.user_email_csv.split(',')] return (args.bucket_name, emails, args.google_project) if __name__ == '__main__': # want the bucket, the emails, the google project bucket_name, users, google_project = parse_commandline_args() # update the param dict to include the cloud-specific parameters params = utils.parse_config_file('CLOUD') sys.stdout.write('Params parsed from config file: %s' % params) driver = get_connection_driver(params, google_project) bucket_obj = get_bucket_and_add_permission(bucket_name, driver, users) # give permissions: give_permissions(driver, bucket_obj, users) # get the objects in that bucket and keeps their names in a list: objects = [ x.name for x in bucket_obj.list_objects() if os.path.basename(x.name).split('.')[-1] != 'json' ] # update the web app's database
reduced_table["fips_code"] = full_fips # Reset the indices so everything is clean reduced_table.reset_index(inplace=True, drop=True) print("\t~~ Success") return reduced_table if __name__ == '__main__': # Parse config file from CL config_file = parse_cl_args() # Load config file config = parse_config_file(config_file) # Load raw data data_path = os.path.join(config["PATHS"]["MISC_DIR"], config["FILES"]["FIP_RAW"]) fips_source = load_raw_data(data_path) # Take only the columns we want reduced_table = get_reduced_table(fips_source) # Rename columns reduced_table = rename_columns(reduced_table) # Rename columns reduced_table = construct_full_fips_code(reduced_table)
def setUp(self): # a basic dictionary to hold the config parameters that don't change # recall that we're mocking out the configuration file parsing process """ self.static_params = {} self.static_params['retention_period'] = '30' self.static_params['reminder_intervals'] = ['14','7','3'] self.static_params['date_format'] = '%m/%d/%Y' """ self.static_params = utils.parse_config_file('TRACKING') self.test_input_files = [] # a 'good' existing db f1 = os.path.join(this_dir, 'test_db_file.db') self.test_input_files.append(f1) with open(f1, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', '03/13/2017' ])) # a db with a bad date f2 = os.path.join(this_dir, 'test_db_file2.db') self.test_input_files.append(f2) with open(f2, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', '13/13/2017' ])) # another 'good' existing db f3 = os.path.join(this_dir, 'test_db_file3.db') self.test_input_files.append(f3) with open(f3, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', '03/13/2017' ])) fout.write('\n') fout.write('\t'.join( ['def', 'bucket-def', '*****@*****.**', '03/15/2017'])) # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days # a second entry does NOT fall on any of the reminder days f4 = os.path.join(this_dir, 'test_db_file4.db') self.test_input_files.append(f4) today = datetime.datetime.now() target_expiration = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][1])) target_expiration_2 = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][1]) + 1) target_expiration_string = target_expiration.strftime( self.static_params['date_format']) target_expiration_string_2 = target_expiration_2.strftime( self.static_params['date_format']) with open(f4, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', target_expiration_string ])) fout.write('\n') fout.write('\t'.join([ 'def', 'bucket-def', '*****@*****.**', target_expiration_string_2 ])) # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days # here there are two projects which expire f5 = os.path.join(this_dir, 'test_db_file5.db') self.test_input_files.append(f5) today = datetime.datetime.now() target_expiration = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][1])) target_expiration_string = target_expiration.strftime( self.static_params['date_format']) with open(f5, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', target_expiration_string ])) fout.write('\n') fout.write('\t'.join([ 'def', 'bucket-def', '*****@*****.**', target_expiration_string ])) # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days # a second entry falls on a different reminder day f6 = os.path.join(this_dir, 'test_db_file6.db') self.test_input_files.append(f6) today = datetime.datetime.now() target_expiration = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][1])) target_expiration_2 = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][0])) target_expiration_string = target_expiration.strftime( self.static_params['date_format']) target_expiration_string_2 = target_expiration_2.strftime( self.static_params['date_format']) with open(f6, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', target_expiration_string ])) fout.write('\n') fout.write('\t'.join([ 'def', 'bucket-def', '*****@*****.**', target_expiration_string_2 ])) # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days # a second entry falls on a different reminder day f7 = os.path.join(this_dir, 'test_db_file7.db') self.test_input_files.append(f7) today = datetime.datetime.now() target_expiration = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][1])) target_expiration_2 = today target_expiration_string = target_expiration.strftime( self.static_params['date_format']) target_expiration_string_2 = target_expiration_2.strftime( self.static_params['date_format']) with open(f7, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', target_expiration_string ])) fout.write('\n') fout.write('\t'.join([ 'def', 'bucket-def', '*****@*****.**', target_expiration_string_2 ])) # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days # a second entry falls on a different reminder day f8 = os.path.join(this_dir, 'test_db_file8.db') self.test_input_files.append(f8) today = datetime.datetime.now() target_expiration = today target_expiration_2 = today + datetime.timedelta( days=int(self.static_params['reminder_intervals'][0]) + 1) target_expiration_string = target_expiration.strftime( self.static_params['date_format']) target_expiration_string_2 = target_expiration_2.strftime( self.static_params['date_format']) with open(f8, 'w') as fout: fout.write('\t'.join([ 'abc', 'bucket-abc', '[email protected],[email protected]', target_expiration_string ])) fout.write('\n') fout.write('\t'.join([ 'def', 'bucket-def', '*****@*****.**', target_expiration_string_2 ]))