def flow(parameters, *_): logging.info('Pulling latest code from COVID19-ISRAEL github repo') logging.info('COVID19_ISRAEL_REPOSITORY=%s' % os.environ.get('COVID19_ISRAEL_REPOSITORY')) logging.info('pulling from origin/master') utils.subprocess_call_log( ['git', 'config', 'user.email', 'avid-covider-pipelines@localhost'], cwd='../COVID19-ISRAEL') utils.subprocess_call_log( ['git', 'config', 'user.name', 'avid-covider-pipelines'], cwd='../COVID19-ISRAEL') if utils.subprocess_call_log(['git', 'pull', 'origin', 'master'], cwd='../COVID19-ISRAEL') != 0: raise Exception('Failed to git pull') sha1 = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd='../COVID19-ISRAEL').decode().strip() return Flow( iter([{ 'sha1': sha1 }]), update_resource(-1, name='github_pull_covid19_israel', path='github_pull_covid19_israel.csv', **{'dpp:streaming': True}), printer(), dump_to_path( parameters.get('dump_to_path', 'data/github_pull_covid19_israel')))
def flow(parameters, *_): logging.info('Running COVID19-ISRAEL module %s' % parameters['module']) mtimes = {} sizes = {} hashes = {} for path in glob('../COVID19-ISRAEL/**', recursive=True): if os.path.isfile(path): mtimes[path] = os.path.getmtime(path) sizes[path] = os.path.getsize(path) hashes[path] = get_hash(path) if utils.subprocess_call_log(['python', '-u', '-m', parameters['module']], log_file=parameters.get('log_file'), cwd='../COVID19-ISRAEL') != 0: raise Exception('Failed to run COVID19-ISRAEL module %s' % parameters['module']) resource_name = parameters.get('resource_name', 'covid19_israel_updated_files') dump_to_path_name = parameters.get( 'dump_to_path', 'data/run_covid19_israel/last_updated_files/%s' % parameters['module']) printer_num_rows = parameters.get('printer_num_rows', 999) return Flow( get_updated_files(mtimes, sizes, hashes), update_resource(-1, name=resource_name, path='%s.csv' % resource_name, **{'dpp:streaming': True}), *([printer( num_rows=printer_num_rows)] if printer_num_rows > 0 else []), *([dump_to_path(dump_to_path_name)] if dump_to_path_name else []))
def run_covid19_israel(parameters, run_row): run_row['github_sha1'] = globals().get('COVID19_ISRAEL_GITHUB_SHA1', '_') args = parameters.get('args') if not args: args = [] cmd = ['python', '-u', '-m', parameters['module'], *args] # cmd = ['echo'] + cmd # cmd = ['bash', '-c', 'echo %s && false' % cmd] log_files_dir = os.path.join(parameters['output-dir'], 'log_files') os.makedirs(log_files_dir, exist_ok=True) log_filename = os.path.join( log_files_dir, '%s.log' % run_row['start_time'].strftime('%Y%m%dT%H%M%S')) if utils.subprocess_call_log( cmd, log_file=log_filename, cwd='../COVID19-ISRAEL') != 0: run_row['error'] = 'yes' logging.error('Failed to run COVID19-ISRAEL module %s with args %s' % (parameters['module'], args)) else: run_row['error'] = 'no' external_sharing_packages = parameters.get("external_sharing_packages") if external_sharing_packages: try: publish_external_sharing_packages.flow({ "packages": external_sharing_packages }).process() except Exception: errmsg = "Failed to export external sharing packages for module %s with args %s" % ( parameters["module"], args) with open(log_filename, "a") as f: f.write(errmsg) traceback.print_exc(file=f) logging.exception(errmsg) run_row['error'] = 'yes'
def _process_packages(): for package in parameters.get("packages", []): with open( os.path.join("..", "COVID19-ISRAEL", package["package_path"])) as f: package_descriptor = json.load(f) resources = { resource["name"]: resource for resource in package_descriptor["resources"] } for publish_target in package["publish_targets"]: assert "github_repo" in publish_target and "deploy_key" in publish_target and ( "files" in publish_target or "files_foreach" in publish_target) with tempfile.TemporaryDirectory() as tmpdir: source_deploy_key_file = os.environ[ "DEPLOY_KEY_FILE_" + publish_target["deploy_key"]] deploy_key_file = os.path.join(tmpdir, "deploy_key") shutil.copyfile(source_deploy_key_file, deploy_key_file) os.chmod(deploy_key_file, 0o400) gitenv = { **os.environ, "GIT_SSH_COMMAND": "ssh -i %s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentitiesOnly=yes" % deploy_key_file } branch = publish_target.get("branch", "master") repodir = os.path.join(tmpdir, "repo") assert subprocess_call_log([ "git", "clone", "--depth", "1", "--branch", branch, "[email protected]:%s.git" % publish_target["github_repo"], repodir ], env=gitenv) == 0 assert subprocess_call_log([ "git", "config", "user.name", "avid-covider-pipelines" ], cwd=repodir) == 0 assert subprocess_call_log([ "git", "config", "user.email", "avid-covider-pipelines@localhost" ], cwd=repodir) == 0 num_added = 0 files = {**publish_target.get("files", {})} for metadata_list_key, files_foreach in publish_target.get( "files_foreach", {}).items(): for resource_name_template, target_path_template in files_foreach.items( ): for foreach_value in package_descriptor.get( metadata_list_key, []): resource_name = resource_name_template.format( foreach_value=foreach_value, **package_descriptor) target_path = target_path_template.format( foreach_value=foreach_value, **package_descriptor) files[resource_name] = target_path for resource_name, target_path_template in files.items(): target_path = target_path_template.format( **package_descriptor) target_fullpath = os.path.join(repodir, target_path) if os.path.exists(target_fullpath) and get_hash( target_fullpath ) == resources[resource_name]["hash"]: logging.info("File is not changed: %s" % resources[resource_name]["path"]) continue source_path = os.path.join( "..", "COVID19-ISRAEL", resources[resource_name]["path"]) logging.info( "%s: %s --> %s" % (resource_name, source_path, target_fullpath)) shutil.copyfile(source_path, target_fullpath) assert subprocess_call_log(["git", "add", target_path], cwd=repodir) == 0 num_added += 1 if num_added > 0: logging.info("Committing %s changes" % num_added) assert subprocess_call_log([ "git", "commit", "-m", "automated update from hasadna/avid-covider-pipelines" ], cwd=repodir) == 0 assert subprocess_call_log( ["git", "push", "origin", branch], cwd=repodir, env=gitenv) == 0 else: logging.info("No changes to commit") yield { "name": package_descriptor["name"], "datetime": package_descriptor["datetime"], "hash": package_descriptor["hash"] }
def flow(parameters, *_): logging.info('Pulling latest code from COVID19-ISRAEL github repo') logging.info('COVID19_ISRAEL_REPOSITORY=%s' % os.environ.get('COVID19_ISRAEL_REPOSITORY')) logging.info('COVID19_ISRAEL_BRANCH=%s' % os.environ.get('COVID19_ISRAEL_BRANCH')) if not os.environ.get('COVID19_ISRAEL_REPOSITORY'): logging.info( 'skipping pull because COVID19_ISRAEL_REPOSITORY env var is empty') logging.info('using env var COVID19_ISRAEL_SHA1 for the sha1') logging.info('COVID19_ISRAEL_SHA1=' + os.environ.get('COVID19_ISRAEL_SHA1', "_")) sha1 = os.environ.get('COVID19_ISRAEL_SHA1', "_") else: utils.subprocess_call_log([ 'git', 'config', 'user.email', 'avid-covider-pipelines@localhost' ], cwd='../COVID19-ISRAEL') utils.subprocess_call_log( ['git', 'config', 'user.name', 'avid-covider-pipelines'], cwd='../COVID19-ISRAEL') branch = os.environ.get('COVID19_ISRAEL_BRANCH') if branch: logging.info('Pulling from origin/' + branch) if utils.subprocess_call_log(['git', 'fetch', 'origin'], cwd='../COVID19-ISRAEL') != 0: raise Exception('Failed to fetch origin') if utils.subprocess_call_log(['git', 'checkout', branch], cwd='../COVID19-ISRAEL') != 0: raise Exception('Failed to switch branch') if utils.subprocess_call_log(['git', 'pull', 'origin', branch], cwd='../COVID19-ISRAEL') != 0: raise Exception('Failed to git pull') else: logging.info('pulling from origin/master') if utils.subprocess_call_log(['git', 'pull', 'origin', 'master'], cwd='../COVID19-ISRAEL') != 0: raise Exception('Failed to git pull') sha1 = subprocess.check_output( ['git', 'rev-parse', 'HEAD'], cwd='../COVID19-ISRAEL').decode().strip() # sha1 = subprocess.check_output(['cat', '/pipelines/data/fake-sha1'], cwd='../COVID19-ISRAEL').decode().strip() if parameters.get('change-run-covid'): with open('avid_covider_pipelines/run_covid19_israel.py', 'r') as f: lines = f.readlines() with open('avid_covider_pipelines/run_covid19_israel.py', 'w') as f: for i, line in enumerate(lines): if i == 0: if line.startswith('COVID19_ISRAEL_GITHUB_SHA1 = '): line = 'COVID19_ISRAEL_GITHUB_SHA1 = "%s"\n' % sha1 else: f.write('COVID19_ISRAEL_GITHUB_SHA1 = "%s"\n' % sha1) f.write(line) return Flow( iter([{ 'sha1': sha1 }]), update_resource(-1, name='github_pull_covid19_israel', path='github_pull_covid19_israel.csv', **{'dpp:streaming': True}), printer(), dump_to_path( parameters.get('dump_to_path', 'data/github_pull_covid19_israel')))