def move_data_into_categorized_directories(): """Sets up a temporary directory for categorized, preprocessed images. If ../temp/deep/categorized/ does not exist, then it is created and filled with preprocessed training and test images in the appropriate categories. Returns the directories for the train and test images.""" dirname = "../temp/deep/categorized/" train_dirname = dirname + "train/" test_dirname = dirname + "test/" if util.ensure_directory_exists(dirname): logger.info("found categorized images in " + dirname + ", proceeding") return train_dirname, test_dirname logger.info( "couldn't find categorized images in {}, copying images now".format( dirname)) util.ensure_directory_exists(train_dirname) util.ensure_directory_exists(test_dirname) for label, sublabels in util.cloud_kinds.items(): logger.debug("processing {}".format(label)) train_labeldirname = train_dirname + label + "/" test_labeldirname = test_dirname + label + "/" os.makedirs(train_labeldirname) os.makedirs(test_labeldirname) for cloud_type in sublabels: logger.debug("current cloud type: {}".format(cloud_type)) sublabeldirname = "../data/" + cloud_type + "/" for filename in os.listdir(sublabeldirname): if random() > 0.2: # Training image; crop and split into left and right half... img = util.cropImage(imread(sublabeldirname + filename)) width = img.shape[1] imsave(train_labeldirname + "l-" + filename, img[:, :(width // 2), :]) imsave(train_labeldirname + "r-" + filename, img[:, (width // 2):, :]) else: # Test image, just crop imsave(test_labeldirname + filename, util.cropImage(imread(sublabeldirname + filename))) logger.info("finished copying images") return train_dirname, test_dirname
def replicate_pipelines( cfg_set, concourse_cfg, job_mapping, definitions_root_dir, template_path, template_include_dir, unpause_pipelines: bool = True, expose_pipelines: bool = True, ): ensure_directory_exists(definitions_root_dir) team_name = job_mapping.team_name() team_credentials = concourse_cfg.team_credentials(team_name) pipeline_names = set() for rendered_pipeline, _, pipeline_metadata in generate_pipelines( definitions_root_dir=definitions_root_dir, job_mapping=job_mapping, template_path=template_path, template_include_dir=template_include_dir, config_set=cfg_set, ): pipeline_name = pipeline_metadata.pipeline_name pipeline_names.add(pipeline_name) info('deploying pipeline {p} to team {t}'.format(p=pipeline_name, t=team_name)) deploy_pipeline( pipeline_definition=rendered_pipeline, pipeline_name=pipeline_name, concourse_cfg=concourse_cfg, team_credentials=team_credentials, unpause_pipeline=unpause_pipelines, expose_pipeline=expose_pipelines, ) concourse_api = client.ConcourseApi(base_url=concourse_cfg.external_url(), team_name=team_name) concourse_api.login(team=team_name, username=team_credentials.username(), passwd=team_credentials.passwd()) # rm pipelines that were not contained in job_mapping pipelines_to_remove = set(concourse_api.pipelines()) - pipeline_names for pipeline_name in pipelines_to_remove: info('removing pipeline: {p}'.format(p=pipeline_name)) concourse_api.delete_pipeline(pipeline_name) # order pipelines alphabetically pipeline_names = list(concourse_api.pipelines()) pipeline_names.sort() concourse_api.order_pipelines(pipeline_names)
def replicate_pipeline_definitions( definition_dir: str, cfg_dir: str, cfg_name: str, ): ''' replicates pipeline definitions from cc-pipelines to component repositories. will only be required until definitions are moved to component repositories. ''' util.ensure_directory_exists(definition_dir) util.ensure_directory_exists(cfg_dir) cfg_factory = ConfigFactory.from_cfg_dir(cfg_dir) cfg_set = cfg_factory.cfg_set(cfg_name) github_cfg = cfg_set.github() github = _create_github_api_object(github_cfg=github_cfg) repo_mappings = util.parse_yaml_file(os.path.join(definition_dir, '.repository_mapping')) for repo_path, definition_file in repo_mappings.items(): # hack: definition_file is a list with always exactly one entry definition_file = util.ensure_file_exists(os.path.join(definition_dir, definition_file[0])) with open(definition_file) as f: definition_contents = f.read() repo_owner, repo_name = repo_path.split('/') helper = GitHubHelper( github=github, repository_owner=repo_owner, repository_name=repo_name, ) # only do this for branch 'master' to avoid merge conflicts for branch_name in ['master']: #branches(github_cfg, repo_owner, repo_name): util.info('Replicating pipeline-definition: {r}:{b}'.format( r=repo_path, b=branch_name, ) ) # create pipeline definition file in .ci/pipeline_definitions try: helper.create_or_update_file( repository_branch=branch_name, repository_version_file_path='.ci/pipeline_definitions', file_contents=definition_contents, commit_message="Import cc-pipeline definition" ) except: pass # keep going
def determine_mail_recipients(src_dir, github_cfg_name): ''' returns a generator yielding all email addresses for the given (git) repository work tree Email addresses are looked up: - from head commit: author and committer - from *CODEOWNERS files [0] Email addresses are not de-duplicated (this should be done by consumers) [0] https://help.github.com/articles/about-codeowners/ ''' cfg_factory = ctx().cfg_factory() github_cfg = cfg_factory.github(github_cfg_name) github_api = githubutil._create_github_api_object(github_cfg) # commiter/author from head commit repo = git.Repo(ensure_directory_exists(src_dir)) head_commit = repo.commit(repo.head) yield head_commit.author.email.lower() yield head_commit.committer.email.lower() # codeowners parser = CodeownersParser(repo_dir=src_dir) resolver = CodeOwnerEntryResolver(github_api=github_api) codeowner_entries = parser.parse_codeowners_entries() yield from resolver.resolve_email_addresses(codeowner_entries)
def get_logger(): global _LOGGER #pylint: disable=global-statement # Check if the logger has already been created if not _LOGGER: # Get a logger and name it logger = logging.getLogger('main') # Set the logging level for the current program logger.setLevel(logging.DEBUG) # Clean user input and make sure the directory exists log_dir = util.clean_directory_name(FLAGS.log_dir) # pylint: disable=no-member util.ensure_directory_exists(log_dir) # pylint: disable=no-member # Clean user input and get just the name if FLAGS.log_name.endswith('/'): log_name = os.path.dirname(FLAGS.log_name) else: log_name = os.path.basename(FLAGS.log_name) # Add the output handler. _file_handler = logging.handlers.RotatingFileHandler( os.path.join(log_dir, log_name), maxBytes=FLAGS.log_size, backupCount=FLAGS.num_logs ) _file_handler.setFormatter(logging.Formatter( fmt='%(asctime)s.%(msecs)03d %(message)s', datefmt='%Y_%m_%d %H:%M:%S' )) _file_handler.setLevel(logging.DEBUG) logger.addHandler(_file_handler) # Add the output handler. _handler = logging.StreamHandler(sys.stdout) _handler.setFormatter(logging.Formatter(fmt='%(message)s')) _handler.setLevel(FLAGS.log_level) logger.addHandler(_handler) # Get a lock on the logger with _LOGGER_LOCK: # Set the global logger _LOGGER = logger return _LOGGER
def _cfg_factory_from_dir(): if not args or not args.cfg_dir: return None from util import ensure_directory_exists cfg_dir = ensure_directory_exists(args.cfg_dir) from model import ConfigFactory factory = ConfigFactory.from_cfg_dir(cfg_dir=cfg_dir) return factory
def load_extracted_features(): """Loads the extracted features from ../temp/deep/features if they were already extracted, and else extracts & saves the features. Returns the training features, training labels, validation features, and validation labels. """ dirname = "../temp/deep/features/" if util.ensure_directory_exists(dirname): logger.info("found extracted features in " + dirname + ", proceeding") loaded = np.load(dirname + "training.npz") tr_features = loaded['tr_features'] tr_labels = loaded['tr_labels'] val_features = loaded['val_features'] val_labels = loaded['val_labels'] return tr_features, tr_labels, val_features, val_labels logger.info("did not find extracted features in " + dirname) train_image_dir, test_image_dir = move_data_into_categorized_directories() # mehr data augmentation optionen: # rotation_range # width_shift_range # height_shift_range # shear_range # zoom_range # vertical_flip train_datagen = ImageDataGenerator(fill_mode='nearest', horizontal_flip=True, rotation_range=10.0, shear_range=5.0, zoom_range=0.2, rescale=1. / 255, validation_split=0.2) num_train = 4096 logger.info("extracting {} training features".format(num_train)) tr_features, tr_labels = extract_features(train_datagen, train_image_dir, "training", num_train) num_val = int(num_train * 0.2) logger.info("extracting {} validation features".format(num_val)) val_features, val_labels = extract_features(train_datagen, train_image_dir, "validation", num_val) np.savez_compressed(dirname + "training.npz", tr_features=tr_features, tr_labels=tr_labels, val_features=val_features, val_labels=val_labels) return tr_features, tr_labels, val_features, val_labels
def save_resized_pictures(height, width): for cloud_kind, subkinds in util.cloud_kinds.items(): if util.ensure_directory_exists("../temp/classic/" + cloud_kind): # directory exists already, we assume that the resized pictures # are in there continue logger.info("saving resized %s pictures" % cloud_kind) counter = 0 for subkind in subkinds: for element in glob.glob("../data/" + subkind + "/*"): img = Image.open(element) resized = img.resize((height, width), PIL.Image.ANTIALIAS) path = "../temp/classic/%s/%s%s.jpg" % (cloud_kind, cloud_kind, counter) resized.save(path) counter += 1
def from_cfg_dir(cfg_dir: str, cfg_types_file='config_types.yaml'): cfg_dir = ensure_directory_exists(os.path.abspath(cfg_dir)) cfg_types_dict = parse_yaml_file(os.path.join(cfg_dir, cfg_types_file)) raw = {} raw[ConfigFactory.CFG_TYPES] = cfg_types_dict def parse_cfg(cfg_type): # assume for now that there is exactly one cfg source (file) cfg_sources = list(cfg_type.sources()) if not len(cfg_sources) == 1: raise ValueError('currently, only exactly one cfg file is supported per type') cfg_file = cfg_sources[0].file() parsed_cfg = parse_yaml_file(os.path.join(cfg_dir, cfg_file), as_snd=False) return parsed_cfg # parse all configurations for cfg_type in map(ConfigType, cfg_types_dict.values()): cfg_name = cfg_type.cfg_type_name() raw[cfg_name] = parse_cfg(cfg_type) return ConfigFactory(raw_dict=raw)