Beispiel #1
0
def main(args):
    path_config, subset_selection_config, simple_training_config, advanced_training_config = \
        utils.parse_config_file(args.config_file_name)

    utils.init_gpu(memory_limit=args.gpu_memory_limit)

    (train_images, train_labels), (test_images, test_labels) = load_subset_database(args, path_config,
                                                                                    subset_selection_config)
    assert len(train_images) == len(train_labels)
    assert len(test_images) == len(test_labels)
    logging.info(f'Number of train samples: {len(train_images)}')
    logging.info(f'Number of test samples: {len(test_images)}')

    if args.scenario == 'experimental':
        train_experimental_model(train_images, train_labels, test_images, test_labels)

    elif args.scenario == 'simple':
        train_basic_model(args, path_config, simple_training_config,
                          train_images, train_labels, test_images, test_labels)

    elif args.scenario == 'advanced':
        train_advanced_model(args, path_config, advanced_training_config,
                             train_images, train_labels, test_images, test_labels)

    else:
        assert False, f'Unhandled scenario configuration: {args.scenario}'
Beispiel #2
0
	def parse_config_file(self):
		"""
		This function calls out to the configuration parser, which contains various constants to run this pipeline
		"""
		target_section = 'DEMUX'
		logging.info('Looking to parse the [%s] section from a configuration file.' % target_section)
		self.config_params_dict = utils.parse_config_file(target_section)
		logging.info('Parameters parsed from configuration file: ')
		logging.info(self.config_params_dict)
    def setUp(self):

        # a basic dictionary to hold the config parameters that don't change
        # recall that we're mocking out the configuration file parsing process
        """
		self.static_params = {}
		self.static_params['retention_period'] = '30'
		self.static_params['reminder_intervals'] = ['14','7','3']
		self.static_params['date_format'] = '%m/%d/%Y'
		"""
        self.static_params = utils.parse_config_file('TRACKING')

        self.test_input_files = []

        # an empty file
        f2 = os.path.join(this_dir, 'test_db_file2.db')
        self.test_input_files.append(f2)
        with open(f2, 'w') as fout:
            fout.write('\n')

        # malformatted date
        f3 = os.path.join(this_dir, 'test_db_file3.db')
        self.test_input_files.append(f3)
        with open(f3, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]', '3/13/17'
            ]))

        # mismatching mbucket names for same project
        f4 = os.path.join(this_dir, 'test_db_file4.db')
        self.test_input_files.append(f4)
        with open(f4, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                '03/13/2017'
            ]))

        # a 'good' existing db
        f5 = os.path.join(this_dir, 'test_db_file5.db')
        self.test_input_files.append(f5)
        with open(f5, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                '03/13/2017'
            ]))

        # a 'good' existing db
        f6 = os.path.join(this_dir, 'test_db_file6.db')
        self.test_input_files.append(f6)
        with open(f6, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                '03/13/2017'
            ]))
def main():

    # load some config parameters
    params = utils.parse_config_file('TRACKING')

    # look in the various places to determine that all database files are consistent
    db_collection = parse_all_db_files(params)

    # get the database contents- see function for details
    db_contents = ensure_consistency(db_collection)

    # scan the contents, perform the appropriate actions
    scan_db(db_contents, params)
Beispiel #5
0
def main():
    now = time.time()
    config = utils.parse_config_file("config/config.yaml")
    response = requests.get(URL, params=dict(
        APPID=config['openweathermap_credentials']['api_key'],
        q="Dublin,IE"
    ))
    new_data = {obj['dt']: obj for obj in response.json()['list']}
    s3 = boto3.client('s3', **config['aws_credentials'])
    tmp_data = json.loads(s3.get_object(Bucket='ww-scraper', Key=FILE_KEY)['Body'].read())
    current_data, deleted_dts = delete_old_dts(tmp_data, now)
    new_dts, updated_dts = merge_new_data_into_current_data(current_data, new_data, now)
    print("New     dts: %s" % (new_dts,))
    print("Deleted dts: %s" % (deleted_dts,))
    print("Updated dts: %s" % (updated_dts,))
    s3.put_object(Bucket='ww-scraper', Key=FILE_KEY, Body=json.dumps(current_data, indent=4))
Beispiel #6
0
def parse_args():
    """Read and parse options from command line
	Initialize logging

	Return values:
	options: options and arguments parsed from command line
	confs: configuration file dictionary information
	"""
    usage = """usage: %prog [options]
To crawl pages from web in terms of special URL patterns."""

    parser = optparse.OptionParser(usage=usage, version='%prog 1.0.0.0')
    parser.add_option('-c',
                      dest='filename',
                      help='read config file',
                      metavar='FILE')

    parser.add_option('-l',
                      '--log',
                      dest='log',
                      action='store_true',
                      help='start logging',
                      default=False)

    options, args = parser.parse_args()
    if options.filename is None:
        print parser.format_help()
        parser.exit()
    else:
        confs = utils.parse_config_file(options.filename)
        if options.log:
            log.read_config_file(confs['log']['log_config_file'])
            log.install(confs['log']['log_name'])
        else:
            log.uninstall()

        return options, confs
def main(project_mapping):
	"""
	project_mapping is a two-level nested dict.
	The first level's keys are the iLab project IDs and each one maps to a dict
	Each 'second level' dict has a bucket and client_emails key, which give the bucket name gs://<bucket name>
	and a list of emails, respectively
	"""

	logging.info('In cloud tracking module')
	logging.info('Project mapping: %s' % project_mapping)

	# get some configuration parameters
	params = utils.parse_config_file('TRACKING')
	
	# need to cleanup some of the parameters:
	try:
		params['retention_period'] = int(params['retention_period'])
		logging.info('Params read from config: %s' % params)
		logging.info('Retention period set to %s days' % params['retention_period'])
	except:
		logging.error('Could not interpret one of the configuration parameters correctly.  Check that the intended data types match those in the config file')
		sys.exit(1)		

	# set the expiration date
	target_date = datetime.datetime.now() + datetime.timedelta(days=params['retention_period'])

	# read the database file
	this_dir = os.path.dirname(os.path.realpath(__file__))
	params['data_retention_db'] = os.path.join(this_dir, params['data_retention_db'])
	if os.path.isfile(params['data_retention_db']):
		logging.info('About to parse database file' )
		project_database = utils.load_database(params['data_retention_db'], params)
		logging.info('Parsed from the database: %s' % project_database)
	else:
		logging.error('Could not find a database file at %s' % params['data_retention_db'])
		raise MissingPrimaryDatabaseException('The primary database file is missing.  Fix that.')

	for project_id, info_dict in project_mapping.items():

		logging.info('Checking project with iLab ID: %s' % project_id)
		# perhaps we have an ongoing project- then a bucket for this iLab ID probably already exists
		if project_id in project_database:

			logging.info('project with ID %s was already in our database. Plan to update the deletion date' % project_id)
			# get the info we have about this in our database
			db_entry = project_database[project_id]
			
			# ensure the bucket names match.  If they do, simply update the retention target date and the email contacts
			if info_dict['bucket'] == db_entry['bucket']:
				logging.info('The delivery buckets matched, as expected')
				logging.info('Changing deletion date from %s to %s' % (db_entry['target_date'].strftime(params['date_format']), target_date.strftime(params['date_format'])))
				db_entry['target_date'] = target_date
				existing_emails = set(db_entry['client_emails'])
				new_emails = set(info_dict['client_emails'])
				total_emails = existing_emails.union(new_emails)
				logging.info('Original emails were: %s' % existing_emails)
				logging.info('New emails were: %s' % new_emails)
				logging.info('The union of those sets of emails is %s' % total_emails)
				db_entry['client_emails'] = list(total_emails)
			else:
				# somehow the same iLab project was placed into a different bucket.  Shouldn't happen, so raise an exception.  We 
				# retain 1-to-1 mapping beween ilab and buckets IDs.  Maybe later we change this behavior based on a particular use-case
				logging.error('The bucket name did not match that of a prior project with the same iLab ID.  This should not happen.')
				logging.error('The bucket found in the database was: %s' % db_entry['bucket'])
				logging.error('The bucket that was just uploaded the demux to was: %s' % info_dict['bucket'])
				raise MultipleBucketsForSameProjectException('The iLab IDs were the same, but the bucket was somehow different.  Someone needs to check this!')
				#TODO- send a message for someone to fix it.

		else:
			logging.info('A new project will be added to the database.')
			logging.info('update info dict.  Before %s, then add %s' % (info_dict, target_date))
			info_dict.update({'target_date': target_date})
			project_database[project_id] = info_dict

	logging.info('Project database: %s' % project_database)
				
	# now write to the database file:
	utils.write_to_db(project_database, params)
Beispiel #8
0
def convert(config_path,
            weights_path,
            output_path,
            remove_batch_normalization=False,
            save_keras=True,
            save_tfjs=False,
            plot_model=False):

    model_name = os.path.splitext(config_path)[0]
    print('Parsing darknet config...')
    unique_config_file = parse_config_file(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Loading weights...')
    weights_file = open(weights_path, 'rb')
    major, minor, revision = np.ndarray(shape=(3, ),
                                        dtype='int32',
                                        buffer=weights_file.read(12))
    if (major * 10 + minor) >= 2 and major < 1000 and minor < 1000:
        seen = np.ndarray(shape=(1, ),
                          dtype='int64',
                          buffer=weights_file.read(8))
    else:
        seen = np.ndarray(shape=(1, ),
                          dtype='int32',
                          buffer=weights_file.read(4))
    print('Weights Header: ', major, minor, revision, seen)

    input_layer = Input(shape=(None, None, 3))
    prev_layer = input_layer
    all_layers = []
    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4

    weight_count = 0  # a number to keep with the read
    out_index = []

    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))

        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            padding = 'same' if pad == 1 and stride == 1 else 'valid'

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)
            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            conv_bias = np.ndarray(shape=(filters, ),
                                   dtype='float32',
                                   buffer=weights_file.read(filters * 4))
            weight_count += filters

            if batch_normalize:
                bn_weights = np.ndarray(shape=(3, filters),
                                        dtype='float32',
                                        buffer=weights_file.read(filters * 12))
                weight_count += 3 * filters
                # scale gamma / # shift beta / # running mean /# running var
                bn_weight_list = [
                    bn_weights[0], conv_bias, bn_weights[1], bn_weights[2]
                ]

            conv_weights = np.ndarray(shape=darknet_w_shape,
                                      dtype='float32',
                                      buffer=weights_file.read(weights_size *
                                                               4))
            weight_count += weights_size

            # Darknet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # we would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])

            if remove_batch_normalization:
                if batch_normalize:
                    conv_layer_weights = fold_batch_norm_layer(
                        conv_weights, bn_weight_list)

                else:
                    conv_layer_weights = [conv_weights, conv_bias]
                use_bias = True
            else:
                if batch_normalize:
                    conv_layer_weights = [conv_weights]

                else:
                    conv_layer_weights = [conv_weights, conv_bias]
                use_bias = not batch_normalize

            #########

            # Create Conv2D layer
            if stride > 1:
                # Darknet uses left and top padding instead of 'same' mode
                prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer)

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            conv_layer = (Conv2D(filters, (size, size),
                                 strides=(stride, stride),
                                 kernel_regularizer=l2(weight_decay),
                                 use_bias=use_bias,
                                 weights=conv_layer_weights,
                                 activation=act_fn,
                                 padding=padding))(prev_layer)
            if not remove_batch_normalization and batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)

            prev_layer = conv_layer
            ###

            if activation == 'leaky':
                prev_layer = LeakyReLU(alpha=0.1)(prev_layer)

            all_layers.append(prev_layer)

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]
            if len(layers) > 1:
                concatenate_layer = Concatenate()(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(pool_size=(size, size),
                             strides=(stride, stride),
                             padding='same')(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('shortcut'):
            index = int(cfg_parser[section]['from'])
            activation = cfg_parser[section]['activation']
            assert activation == 'linear', 'Only linear activation supported.'
            all_layers.append(Add()([all_layers[index], prev_layer]))
            prev_layer = all_layers[-1]

        elif section.startswith('reorg'):
            block_size = int(cfg_parser[section]['stride'])
            assert block_size == 2, 'Only reorg with stride 2 supported.'
            all_layers.append(
                Lambda(reorg, output_shape=reorg_shape,
                       name=str(section))(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            assert stride == 2, 'Only stride=2 supported.'
            all_layers.append(UpSampling2D(stride)(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('softmax'):
            all_layers.append(Activation("softmax")(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('avgpool'):
            all_layers.append(GlobalAveragePooling2D()(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo') or section.startswith('region'):
            out_index.append(len(all_layers) - 1)
            all_layers.append(None)
            prev_layer = all_layers[-1]
            anchors = cfg_parser[section]['anchors']

        elif section.startswith('net'):
            pass

        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(
        weight_count, weight_count + remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))

    # Create and save model.
    if len(out_index) == 0: out_index.append(len(all_layers) - 1)
    input_l = input_layer
    out_l = [all_layers[i] for i in out_index]

    model = Model(inputs=input_l, outputs=out_l)
    print(model.summary())

    if remove_batch_normalization:
        model_save_path = '{}/{}_nobn.h5'.format(output_path, model_name)
        model_summary_path = '{}/{}_nobn_summary.txt'.format(
            output_path, model_name)
        tfjs_path = '{}/{}-tfjs_nobn/'.format(output_path, model_name)
        plot_path = '{}/{}_nobn.png'.format(output_path, model_name)
        anchors_path = '{}/{}_nobn_anchors.txt'.format(output_path, model_name)

    else:
        model_save_path = '{}/{}.h5'.format(output_path, model_name)
        model_summary_path = '{}/{}_summary.txt'.format(
            output_path, model_name)
        tfjs_path = '{}/{}-tfjs/'.format(output_path, model_name)
        plot_path = '{}/{}.png'.format(output_path, model_name)
        anchors_path = '{}/{}_anchors.txt'.format(output_path, model_name)

    with open(anchors_path, 'w') as f:
        print(anchors, file=f)
        print('Saved anchors to {}'.format(anchors_path))

    with open(model_summary_path, 'w') as f:
        model.summary(print_fn=lambda x: f.write(x + '\n'))
        print('Saved model summary to {}'.format(model_summary_path))

    if save_keras:
        model.save(model_save_path)
        print('Saved Keras model to {}'.format(model_save_path))

    if save_tfjs:
        import tensorflowjs as tfjs
        # tfjs.converters.save_keras_model(model, tfjs_path, quantization_dtype=np.uint8)
        tfjs.converters.save_keras_model(model, tfjs_path)
        print('Saved Tensorflowjs model to {}'.format(tfjs_path))

    if plot_model:
        from keras.utils.vis_utils import plot_model as plot
        plot(model, to_file=plot_path, show_shapes=True)
        print('Saved image plot to {}'.format(plot_path))
Beispiel #9
0
    print("[Evaluator]  Initializing at %d training steps:" %
          crt_training_step)
    agent = eval_agent

    eval_env.get_crt_step(crt_training_step)
    agent.policy_evaluation.policy.load_state_dict(policy.state_dict())
    preprocess = Preprocessor(cmdl.env_class).transform

    step_cnt = 0
    o, r, done = eval_env.reset(), 0, False
    while step_cnt < cmdl.evaluator.eval_steps:
        s = preprocess(o)
        a = agent.evaluate_policy(s)
        o, r, done, _ = eval_env.step(a)
        step_cnt += 1
        if done:
            o, r, done = eval_env.reset(), 0, False


if __name__ == "__main__":

    # Parse cmdl args for the config file and return config as Namespace
    config = utils.parse_config_file(utils.parse_cmd_args())

    # Assuming everything in the config is deterministic already.
    torch.manual_seed(config.seed)
    numpy.random.seed(config.seed)

    # Let's do this!
    train_agent(config)
Beispiel #10
0
    print("--------- Training ----------")
    print("N-step       : %d" % config.agent.n_horizon)
    print("Optim Freq   : %d" % config.agent.update_freq)
    print("Batch        : %d" % config.agent.batch_size)
    print("Fast Lr      : %.6f" % config.agent.fast_lr)
    print("Slow Lr      : %.6f" % config.agent.slow_lr)
    print("-----------------------------")
    print("stp, nst, act  |  return")
    print("-----------------------------")


def display_stats(ep_cnt, step_cnt, elapsed_time):
    fps = step_cnt / elapsed_time
    print(
        clr(
            "[  %s   ] finished after %d eps, %d steps." %
            ("Main", ep_cnt, step_cnt), 'white', 'on_magenta'))
    print(
        clr(
            "[  %s   ] finished after %.2fs, %.2ffps." %
            ("Main", elapsed_time, fps), 'white', 'on_magenta'))


if __name__ == "__main__":
    cmd_args = utils.parse_cmd_args()
    config = utils.parse_config_file(cmd_args)
    torch.manual_seed(config.seed)
    numpy.random.seed(config.seed)

    train_agent(config)
Beispiel #11
0
def entry_method(project_dir, params):
    """
	project_dir is a string giving the full path to the project directory
	params is a dictionary of configuration parameters.
	"""

    # update the param dict to include the cloud-specific parameters
    cloud_specific_params = utils.parse_config_file('CLOUD')
    params.update(cloud_specific_params)
    logging.info('Updated params in cloud upload script: %s' % params)

    # correct the fastQC directory suffix name
    params['fastqc_dir_tag'] = params['final_fastq_tag'] + params[
        'fastqc_dir_suffix']

    params['fastq_file_suffix'] = '.' + params['final_fastq_tag'] + '.fastq.gz'

    client_email_addresses = get_client_emails(project_dir, params)
    ilab_id = os.path.basename(project_dir).replace('_', '-').lower()

    driver = get_connection_driver(params)
    bucket_obj = get_or_create_bucket(ilab_id, driver, client_email_addresses,
                                      params)

    #TODO: collect lane-specific fastq

    fastq_files = collect_files(project_dir, 'fastq_file_suffix', params)
    logging.info('Found the following fastq files:')
    logging.info('\n'.join(fastq_files))
    fastQC_dirs = collect_files(project_dir, 'fastqc_dir_tag', params)
    logging.info('Found the following fastQC report directories:')
    logging.info('\n'.join(fastQC_dirs))
    #for fqd in fastQC_dirs:
    #	prep_fastqc_files(fqd, bucket_obj.name, params)

    #zip-up fastQC directories
    zipfile = zip_fastqc_reports(fastQC_dirs, project_dir, params)

    # do uploads
    uploaded_objects = []
    uploaded_objects.extend(
        upload_fastq_dir(fastq_files, project_dir, bucket_obj,
                         params['cloud_fastq_root'], params))
    #uploaded_objects.extend(upload(fastq_files, bucket_obj, params['cloud_fastq_root'], params))
    uploaded_objects.extend(
        upload([
            zipfile,
        ], bucket_obj, params['cloud_fastqc_root'], params))

    # give permissions:
    give_permissions(driver, bucket_obj, uploaded_objects,
                     client_email_addresses, params)

    # let the webapp know about the uploads:
    update_webapp_database(bucket_obj, uploaded_objects,
                           client_email_addresses, params)

    # get the total upload size
    # commented out since using rsync
    #upload_size = calculate_upload_size(uploaded_objects)
    #logging.info('upload size in GB: %s' % upload_size)

    # upload the metadata file:
    upload([
        os.path.join(project_dir, params['project_descriptor']),
    ], bucket_obj, '', params)

    # handle master metadata file
    update_project_mappings(driver, bucket_obj, client_email_addresses, params)

    # return the bucket name and the email addresses- need this information for the data retention process
    return bucket_obj.name, client_email_addresses
    cwd = os.getcwd()

    if os.path.basename(cwd) != "src":
        raise Exception(
            "CHANGE CURRENT WORKING DIRECTORY TO THE `src` PATH BEFORE RUNNING!!"
        )

    # Load config_file_path from commandline input
    args = parse_cl_args()
    config_file_path = args.config_file

    state_level = args.state_level
    keywords_filter = args.keywords_filter

    # Get config file object
    config = parse_config_file(config_file_path)

    # Intialize the Geo class and load lookup tables/dicts
    g = Geo()
    fip_lookup = g.load_fip_code_lookup()
    state_lookup = g.load_state_abbrv_lookup(as_dict=True)

    # Get base dir for county-level data and set data file paths
    county_data_dir = config["PATHS"]["COUNTY_DATA_DIR"]
    state_data_dir = config["PATHS"]["STATE_DATA_DIR"]
    covid_data_dir = config["PATHS"]["COVID_DATA_DIR"]
    intermediate_data_dir = config["PATHS"]["INTERMEDIATE_DATA_DIR"]

    people_file_path = os.path.join(county_data_dir,
                                    config["FILES"]["COUNTY_PEOPLE"])
    income_file_path = os.path.join(county_data_dir,
Beispiel #13
0
    def setUp(self):

        # a basic dictionary to hold the config parameters that don't change
        # recall that we're mocking out the configuration file parsing process
        self.static_params = utils.parse_config_file('TRACKING')

        self.test_input_files = []

        # make a dummy bucket
        self.test_bucket = 'cccb-seq-tmp-testing'
        cmd = 'gsutil mb gs://%s' % self.test_bucket
        subprocess.Popen(cmd, shell=True)

        # need to sleep so the copy can work.
        time.sleep(5)

        # a dummy file to delete -- just copy this file
        cmd = 'gsutil cp %s gs://%s/' % (os.path.abspath(__file__),
                                         self.test_bucket)
        subprocess.Popen(cmd, shell=True)

        # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days
        # a second entry does NOT fall on any of the reminder days
        f1 = os.path.join(this_dir, 'test_db_file.db')
        self.test_input_files.append(f1)
        today = datetime.datetime.now()

        # set a target date that will trigger a notification
        target_expiration = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][1]))

        # set another target date that will NOT trigger a notification
        target_expiration_2 = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][1]) + 1)

        target_expiration_string = target_expiration.strftime(
            self.static_params['date_format'])
        target_expiration_string_2 = target_expiration_2.strftime(
            self.static_params['date_format'])

        with open(f1, 'w') as fout:
            fout.write('\t'.join([
                'abc-12345-678', self.test_bucket, '*****@*****.**',
                target_expiration_string
            ]))
            fout.write('\n')
            fout.write('\t'.join([
                'def', 'bucket-def', '*****@*****.**',
                target_expiration_string_2
            ]))

        f2 = os.path.join(this_dir, 'test_db_file2.db')
        self.test_input_files.append(f2)

        # set a target date that will trigger a notification to CCCB
        target_expiration = today

        # set another target date that will NOT trigger a notification
        target_expiration_2 = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][1]) + 1)

        target_expiration_string = target_expiration.strftime(
            self.static_params['date_format'])
        target_expiration_string_2 = target_expiration_2.strftime(
            self.static_params['date_format'])

        with open(f2, 'w') as fout:
            fout.write('\t'.join([
                'abc-12345-678', self.test_bucket, '*****@*****.**',
                target_expiration_string
            ]))
            fout.write('\n')
            fout.write('\t'.join([
                'def', 'bucket-def', '*****@*****.**',
                target_expiration_string_2
            ]))
Beispiel #14
0
        'The google project name.  If not given, there is a default (see -h)',
        default='cccb-data-delivery',
        dest='google_project')

    args = parser.parse_args()
    emails = [x.strip() for x in args.user_email_csv.split(',')]
    return (args.bucket_name, emails, args.google_project)


if __name__ == '__main__':

    # want the bucket, the emails, the google project
    bucket_name, users, google_project = parse_commandline_args()

    # update the param dict to include the cloud-specific parameters
    params = utils.parse_config_file('CLOUD')
    sys.stdout.write('Params parsed from config file: %s' % params)

    driver = get_connection_driver(params, google_project)
    bucket_obj = get_bucket_and_add_permission(bucket_name, driver, users)

    # give permissions:
    give_permissions(driver, bucket_obj, users)

    # get the objects in that bucket and keeps their names in a list:
    objects = [
        x.name for x in bucket_obj.list_objects()
        if os.path.basename(x.name).split('.')[-1] != 'json'
    ]

    # update the web app's database
    reduced_table["fips_code"] = full_fips

    # Reset the indices so everything is clean
    reduced_table.reset_index(inplace=True, drop=True)

    print("\t~~ Success")
    return reduced_table


if __name__ == '__main__':

    # Parse config file from CL
    config_file = parse_cl_args()

    # Load config file
    config = parse_config_file(config_file)

    # Load raw data
    data_path = os.path.join(config["PATHS"]["MISC_DIR"],
                             config["FILES"]["FIP_RAW"])
    fips_source = load_raw_data(data_path)

    # Take only the columns we want
    reduced_table = get_reduced_table(fips_source)

    # Rename columns
    reduced_table = rename_columns(reduced_table)

    # Rename columns
    reduced_table = construct_full_fips_code(reduced_table)
Beispiel #16
0
    def setUp(self):

        # a basic dictionary to hold the config parameters that don't change
        # recall that we're mocking out the configuration file parsing process
        """
		self.static_params = {}
		self.static_params['retention_period'] = '30'
		self.static_params['reminder_intervals'] = ['14','7','3']
		self.static_params['date_format'] = '%m/%d/%Y'
		"""
        self.static_params = utils.parse_config_file('TRACKING')

        self.test_input_files = []

        # a 'good' existing db
        f1 = os.path.join(this_dir, 'test_db_file.db')
        self.test_input_files.append(f1)
        with open(f1, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                '03/13/2017'
            ]))

        # a db with a bad date
        f2 = os.path.join(this_dir, 'test_db_file2.db')
        self.test_input_files.append(f2)
        with open(f2, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                '13/13/2017'
            ]))

        # another 'good' existing db
        f3 = os.path.join(this_dir, 'test_db_file3.db')
        self.test_input_files.append(f3)
        with open(f3, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                '03/13/2017'
            ]))
            fout.write('\n')
            fout.write('\t'.join(
                ['def', 'bucket-def', '*****@*****.**', '03/15/2017']))

        # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days
        # a second entry does NOT fall on any of the reminder days
        f4 = os.path.join(this_dir, 'test_db_file4.db')
        self.test_input_files.append(f4)
        today = datetime.datetime.now()
        target_expiration = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][1]))
        target_expiration_2 = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][1]) + 1)
        target_expiration_string = target_expiration.strftime(
            self.static_params['date_format'])
        target_expiration_string_2 = target_expiration_2.strftime(
            self.static_params['date_format'])
        with open(f4, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                target_expiration_string
            ]))
            fout.write('\n')
            fout.write('\t'.join([
                'def', 'bucket-def', '*****@*****.**',
                target_expiration_string_2
            ]))

        # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days
        # here there are two projects which expire
        f5 = os.path.join(this_dir, 'test_db_file5.db')
        self.test_input_files.append(f5)
        today = datetime.datetime.now()
        target_expiration = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][1]))
        target_expiration_string = target_expiration.strftime(
            self.static_params['date_format'])
        with open(f5, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                target_expiration_string
            ]))
            fout.write('\n')
            fout.write('\t'.join([
                'def', 'bucket-def', '*****@*****.**', target_expiration_string
            ]))

        # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days
        # a second entry falls on a different reminder day
        f6 = os.path.join(this_dir, 'test_db_file6.db')
        self.test_input_files.append(f6)
        today = datetime.datetime.now()
        target_expiration = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][1]))
        target_expiration_2 = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][0]))
        target_expiration_string = target_expiration.strftime(
            self.static_params['date_format'])
        target_expiration_string_2 = target_expiration_2.strftime(
            self.static_params['date_format'])
        with open(f6, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                target_expiration_string
            ]))
            fout.write('\n')
            fout.write('\t'.join([
                'def', 'bucket-def', '*****@*****.**',
                target_expiration_string_2
            ]))

        # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days
        # a second entry falls on a different reminder day
        f7 = os.path.join(this_dir, 'test_db_file7.db')
        self.test_input_files.append(f7)
        today = datetime.datetime.now()
        target_expiration = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][1]))
        target_expiration_2 = today
        target_expiration_string = target_expiration.strftime(
            self.static_params['date_format'])
        target_expiration_string_2 = target_expiration_2.strftime(
            self.static_params['date_format'])
        with open(f7, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                target_expiration_string
            ]))
            fout.write('\n')
            fout.write('\t'.join([
                'def', 'bucket-def', '*****@*****.**',
                target_expiration_string_2
            ]))

        # a 'good' existing db where the expiration date is in the future, but falls on one of our reminder days
        # a second entry falls on a different reminder day
        f8 = os.path.join(this_dir, 'test_db_file8.db')
        self.test_input_files.append(f8)
        today = datetime.datetime.now()
        target_expiration = today
        target_expiration_2 = today + datetime.timedelta(
            days=int(self.static_params['reminder_intervals'][0]) + 1)
        target_expiration_string = target_expiration.strftime(
            self.static_params['date_format'])
        target_expiration_string_2 = target_expiration_2.strftime(
            self.static_params['date_format'])
        with open(f8, 'w') as fout:
            fout.write('\t'.join([
                'abc', 'bucket-abc', '[email protected],[email protected]',
                target_expiration_string
            ]))
            fout.write('\n')
            fout.write('\t'.join([
                'def', 'bucket-def', '*****@*****.**',
                target_expiration_string_2
            ]))