def handler(event, context): for record in event['Records']: message = json.loads(record["body"]) chat_id = str(message['chat_id']) prefix = str(message['prefix']) tg_client = TgClient(chat_id) s3 = boto3.client('s3') # do not process items with same prefix if # already done - SQS could send duplicates output_exists = S3Helper.is_file_exist(s3, BUCKET, '{}_output.jpg'.format(prefix)) if (output_exists is False): try: tg_client.send_message('NST - Запустил обработку...') # requesting EC2 instance to process the image # NST content and style images url = 'http://{}/api/?img_prefix={}'.format( EC2_NST_SERVER_IP, prefix) response = requests.get(url) responseJson = json.loads(response.text) file_name = responseJson['file_name'] file_url = 'https://{}.s3.amazonaws.com/{}'.format( BUCKET, file_name) # sending output image back tg_client.send_photo(file_url) except Exception as e: tg_client.send_message('NST - Упс, ошибочка - {}'.format(e))
def copy_files(cube): s3_helper = S3Helper() # Look in the output directory directory_to_save = join(CHILES_IMGCONCAT_OUTPUT, cube) + '.cube' if isdir(directory_to_save): LOGGER.info('dir_name: {0}'.format(directory_to_save)) output_tar_filename = directory_to_save + '.tar' if can_be_multipart_tar(directory_to_save): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, 'IMGCONCAT/{0}'.format(basename(output_tar_filename)), directory_to_save, bufsize=20 * 1024 * 1024) else: LOGGER.info('Using make_tarfile, then adding file to bucket') make_tarfile(output_tar_filename, directory_to_save) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'IMGCONCAT/{0}'.format(basename(output_tar_filename)), output_tar_filename) # Clean up os.remove(output_tar_filename)
def __init__(self): """Init method for the Report helper class.""" self.s3 = S3Helper() self.pg = Postgres() self.conn = self.pg.conn self.cursor = self.pg.cursor self.unknown_deps_helper = UnknownDepsReportHelper() self.sentry_helper = SentryReportHelper() self.npm_model_bucket = os.getenv('NPM_MODEL_BUCKET', 'cvae-insights') self.maven_model_bucket = os.getenv('MAVEN_MODEL_BUCKET', 'hpf-insights') self.pypi_model_bucket = os.getenv('PYPI_MODEL_BUCKET', 'hpf-insights') self.golang_model_bucket = os.getenv('GOLANG_MODEL_BUCKET', 'golang-insights') self.maven_training_repo = os.getenv( 'MAVEN_TRAINING_REPO', 'https://github.com/fabric8-analytics/f8a-hpf-insights') self.npm_training_repo = os.getenv( 'NPM_TRAINING_REPO', 'https://github.com/fabric8-analytics/fabric8-analytics-npm-insights' ) self.golang_training_repo = os.getenv( 'GOLANG_TRAINING_REPO', 'https://github.com/fabric8-analytics/f8a-golang-insights') self.pypi_training_repo = os.getenv( 'PYPI_TRAINING_REPO', 'https://github.com/fabric8-analytics/f8a-pypi-insights') self.emr_api = os.getenv('EMR_API', 'http://f8a-emr-deployment:6006')
def main(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) # Get the data we need tar, tar_gz = get_cvel(bucket) delete_duplicates(tar, tar_gz, bucket)
def __init__(self): """Init method for the Report helper class.""" self.s3 = S3Helper() self.curr_dir = os.path.join( '/tmp', "dynamic_manifests") if not os.path.exists(self.curr_dir): os.makedirs(self.curr_dir)
def copy_files(frequency_id): s3_helper = S3Helper() # Look in the output directory LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT)) for dir_name in os.listdir(CHILES_CLEAN_OUTPUT): LOGGER.info('dir_name: {0}'.format(dir_name)) result_dir = join(CHILES_CLEAN_OUTPUT, dir_name) if isdir(result_dir) and dir_name.startswith( 'cube_') and dir_name.endswith('.image'): LOGGER.info('dir_name: {0}'.format(dir_name)) output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar') if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, '/CLEAN/{0}/{1}'.format(frequency_id, basename(output_tar_filename)), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format( frequency_id, basename(output_tar_filename)), output_tar_filename) # Clean up os.remove(output_tar_filename)
def copy_files(date, vis_file): s3_helper = S3Helper() # Look in the output directory for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT): LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format( root, dir_names, filenames)) for match in fnmatch.filter(dir_names, vis_file): result_dir = join(root, match) LOGGER.info('Working on: {0}'.format(result_dir)) if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') output_tar_filename = join(root, match + '.tar') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), output_tar_filename) # Clean up os.remove(output_tar_filename) shutil.rmtree(result_dir, ignore_errors=True)
def copy_files(frequency_id, processes, days): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format( bucket, frequency_id)) # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)): LOGGER.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'): elements = key.key.split('/') if elements[2] in days: directory = '/mnt/output/Chiles/split_vis/{0}/'.format( elements[2]) # Queue the copy of the file temp_file = os.path.join( directory, 'data.tar.gz' if key.key.endswith('/data.tar.gz') else 'data.tar') queue.put(Task(key, temp_file, directory, frequency_id)) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def get_build_results_if_exists(s3_helper: S3Helper, s3_prefix: str) -> Optional[List[str]]: try: content = s3_helper.list_prefix(s3_prefix) return content except Exception as ex: logging.info("Got exception %s listing %s", ex, s3_prefix) return None
def main(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) for key in bucket.list(prefix='CLEAN/'): if not key.key.endswith('image.tar.gz') and not key.key.endswith( 'image.tar'): LOG.info('Removing {0}'.format(key.key)) key.delete()
def __call__(self): # noinspection PyBroadException try: LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location)) s3_helper = S3Helper() s3_helper.add_file_to_bucket(CHILES_BUCKET_NAME, self._bucket_location, self._filename) except Exception: LOGGER.exception('CopyTask died')
def get_clean(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) clean_data = set() for key in bucket.list(prefix='CLEAN/'): if key.key.endswith('image.tar.gz') or key.key.endswith('image.tar'): LOG.info('Checking {0}'.format(key.key)) elements = key.key.split('/') clean_data.add(elements[1]) return clean_data
def __init__(self): """Init method for SentryReportHelper.""" self.s3 = S3Helper() self.sentry_url = os.getenv('SENTRY_URL', 'https://sentry.devshift.net') self.sentry_api_issues = self.sentry_url + os.getenv( 'SENTRY_API_ISSUES', '/api/0/projects/sentry/fabric8-analytics-production/issues/') self.sentry_api_tags = self.sentry_url + os.getenv( 'SENTRY_API_TAGS', '/api/0/issues/') self.sentry_token = os.getenv('SENTRY_AUTH_TOKEN', '')
def get_cvel(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) cvel_data = [] for key in bucket.list(prefix='CVEL/'): LOG.info('Checking {0}'.format(key.key)) if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'): elements = key.key.split('/') cvel_data.append([str(elements[2]), str(elements[1])]) return cvel_data
def __init__(self): """Init method for SentryReportHelper.""" self.s3 = S3Helper() self.sentry_api_issues = os.getenv( 'SENTRY_API_ISSUES', 'https://errortracking' '.prod-preview.openshift' '.io/api/0/projects/openshift_io/' 'fabric8-analytics-production/' 'issues/') self.sentry_api_tags = os.getenv( 'SENTRY_API_TAGS', 'https://errortracking.prod-preview' '.openshift.io/api/0/issues/') self.sentry_token = os.getenv('SENTRY_AUTH_TOKEN', '')
def process_clean(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) clean_data = {} for key in bucket.list(prefix='CLEAN-log/'): if key.key.endswith('chiles-output.log'): elements = key.key.split('/') element_1 = elements[1] if element_1 == 'standalone': # Ignore it pass elif element_1 not in clean_data.keys(): clean_data[element_1] = elements else: old_elements = clean_data[element_1] if elements[2] > old_elements[2]: clean_data[element_1] = elements total_time = timedelta() for key, value in clean_data.iteritems(): new_dir = os.path.join('/tmp', value[0], value[1], value[2]) if not os.path.exists(new_dir): os.makedirs(new_dir) file_name = os.path.join(new_dir, value[3]) if not os.path.exists(file_name): key_name = os.path.join(value[0], value[1], value[2], value[3]) s3_helper.get_file_from_bucket(CHILES_BUCKET_NAME, key_name, file_name) with open(file_name, 'rb') as file_handle: first = next(file_handle) offset = -100 while True: file_handle.seek(offset, 2) lines = file_handle.readlines() if len(lines) > 1: last = lines[-1] break offset *= 2 first_line = first.split() last_line = last.split(':') start_time = time.strptime('{0} {1} {2}'.format(first_line[0], first_line[1], first_line[2]), '%b %d %H:%M:%S') end_time = time.strptime('{0}:{1}:{2}'.format(last_line[1], last_line[2], last_line[3].split(',')[0]), '%Y-%m-%d %H:%M:%S') start_time = datetime(end_time.tm_year, start_time.tm_mon, start_time.tm_mday, start_time.tm_hour, start_time.tm_min, start_time.tm_sec) end_time = datetime(end_time.tm_year, end_time.tm_mon, end_time.tm_mday, end_time.tm_hour, end_time.tm_min, end_time.tm_sec) total_time += end_time - start_time return total_time
def check_for_success_run( s3_helper: S3Helper, s3_prefix: str, build_name: str, build_config: BuildConfig, ): logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix) logging.info("Checking for artifacts in %s", logged_prefix) try: # TODO: theoretically, it would miss performance artifact for pr==0, # but luckily we rerun only really failed tasks now, so we're safe build_results = s3_helper.list_prefix(s3_prefix) except Exception as ex: logging.info("Got exception while listing %s: %s\nRerun", logged_prefix, ex) return if build_results is None or len(build_results) == 0: logging.info("Nothing found in %s, rerun", logged_prefix) return logging.info("Some build results found:\n%s", build_results) build_urls = [] log_url = "" for url in build_results: url_escaped = url.replace("+", "%2B").replace(" ", "%20") if BUILD_LOG_NAME in url: log_url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{url_escaped}" else: build_urls.append( f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{url_escaped}") if not log_url: # log is uploaded the last, so if there's no log we need to rerun the build return success = len(build_urls) > 0 create_json_artifact( TEMP_PATH, build_name, log_url, build_urls, build_config, 0, success, ) # Fail build job if not successeded if not success: sys.exit(1) else: sys.exit(0)
def main(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) cvel_data = set() for key in bucket.list(prefix='CVEL/'): LOG.info('Checking {0}'.format(key.key)) elements = key.key.split('/') cvel_data.add(elements[1]) output = '\n' for key in sorted(cvel_data): output += '{0} '.format(key) output += '\n' LOG.info(output)
def get_cvel(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) cvel_data = {} for key in bucket.list(prefix='CVEL/'): LOGGER.info('Checking {0}'.format(key.key)) if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'): elements = key.key.split('/') data_list = cvel_data.get(str(elements[1])) if data_list is None: data_list = [] cvel_data[str(elements[1])] = data_list data_list.append(str(elements[2])) return cvel_data
def __call__(self): # noinspection PyBroadException try: s3_helper = S3Helper(self._aws_access_key_id, self._aws_secret_access_key) LOGGER.info('Copying to: {0}/{1}/measurement_set.tar'.format(self._bucket, self._bucket_location)) # We can have 10,000 parts # The biggest file from Semester 1 is 803GB # So 100 MB s3_helper.add_tar_to_bucket_multipart( self._bucket, '{0}/measurement_set.tar'.format(self._bucket_location), self._filename, parallel_processes=2, bufsize=100*1024*1024 ) except Exception: LOGGER.exception('CopyTask died')
def process_test_results( s3_client: S3Helper, test_results: List[Tuple[str, str, str]], s3_path_prefix: str) -> Tuple[str, List[Tuple[str, str]]]: overall_status = "success" processed_test_results = [] for image, build_log, status in test_results: if status != "OK": overall_status = "failure" url_part = "" if build_log is not None and os.path.exists(build_log): build_url = s3_client.upload_test_report_to_s3( build_log, s3_path_prefix + "/" + os.path.basename(build_log)) url_part += f'<a href="{build_url}">build_log</a>' if url_part: test_name = image + " (" + url_part + ")" else: test_name = image processed_test_results.append((test_name, status)) return overall_status, processed_test_results
def get_list_data(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOG.info('Scanning bucket: {0}'.format(bucket)) file_list = {} for key in bucket.list(prefix='CVEL/'): # Ignore the key if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'): (head, tail) = os.path.split(key.key) element = file_list.get(head) if element is None: file_list[head] = KeyData(key, tail) elif tail.endswith('data.tar'): file_list[head] = KeyData(key, tail) keys = file_list.keys() keys = sorted(keys) return keys, file_list
def save_worker_result_to_s3(frequency, report_name, content) -> bool: """Save worker result in S3 bucket. :param frequency: Frequency of Reporting ( daily/ monthly) :param report_name: Name of File/ Report. :param content: File Content to be saved in S3 :return: True: Save Success, False: Saved Fail """ logger.info("Trying to save report file") try: s3 = S3Helper() obj_key = f'v2/{frequency}/{report_name}.json' s3.store_json_content(content=content, obj_key=obj_key, bucket_name=s3.report_bucket_name) logger.info(f"Successfully saved report in {obj_key}.") return True except Exception as e: logger.exception(f'Unable to store the report on S3. Reason: {e}') return False
def upload_master_static_binaries( pr_info: PRInfo, build_config: BuildConfig, s3_helper: S3Helper, build_output_path: str, ): """Upload binary artifacts to a static S3 links""" static_binary_name = build_config.get("static_binary_name", False) if pr_info.number != 0: return elif not static_binary_name: return elif pr_info.base_ref != "master": return s3_path = "/".join((pr_info.base_ref, static_binary_name, "clickhouse")) binary = os.path.join(build_output_path, "clickhouse") url = s3_helper.upload_build_file_to_s3(binary, s3_path) print(f"::notice ::Binary static URL: {url}")
def copy_files(directory_name): # Create the directory if not exists(DIRECTORY): os.makedirs(DIRECTORY) # Scan the bucket s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOG.info('Scanning bucket: {0}/CLEAN/{1}'.format(bucket, directory_name)) for key in bucket.list(prefix='CLEAN/{0}'.format(directory_name)): LOG.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('.image.tar.gz') or key.key.endswith('.image.tar'): # Do we need this file? basename_key = basename(key.key) tar_file = os.path.join(DIRECTORY, basename_key) if tar_file.endswith('.tar.gz'): image_name = basename(tar_file).replace('.tar.gz', '') else: image_name = basename(tar_file).replace('.tar', '') directory = join(DIRECTORY, image_name) # noinspection PyBroadException try: LOG.info('key: {0}, tar_file: {1}, directory: {2}'.format( key.key, tar_file, directory)) if os.path.exists(directory): LOG.info('directory already exists: {0}'.format(directory)) else: os.makedirs(directory) key.get_contents_to_filename(tar_file) with closing( tarfile.open( tar_file, "r:gz" if tar_file.endswith('.tar.gz') else "r:")) as tar: tar.extractall(path=directory) os.remove(tar_file) except Exception: LOG.exception('Task died') shutil.rmtree(directory, ignore_errors=True)
def __init__(self, bucket, aws_profile=None, logger=None): """ Initialization function of the ITSSandbox class. Parameters: bucket: Name of the AWS S3 bucket that contains the ITS Sandbox. aws_profile: Optional string name of your AWS profile, as set up in the credential file at ~/.aws/credentials. No need to pass in this parameter if you will be using your default profile. For additional information on how to set up the credential file, see https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/guide_credentials_profiles.html logger: Optional parameter. Could pass in a logger object or not pass in anything. If a logger object is passed in, information will be logged instead of printed. If not, information will be printed. """ self.bucket = bucket self.s3helper = S3Helper(aws_profile=aws_profile) self.print_func = print if logger: self.print_func = logger.info
def copy_files(processes, bottom_frequency, frequency_range): # Create the directory if not exists(DIRECTORY): os.makedirs(DIRECTORY) # Scan the bucket s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOGGER.info('Scanning bucket: {0}/CLEAN'.format(bucket)) # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() for key in bucket.list(prefix='CLEAN/'): LOGGER.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('.image.tar.gz') or key.key.endswith('.image.tar'): # Do we need this file? basename_key = basename(key.key) if in_frequency_range(basename_key, bottom_frequency, frequency_range): # Queue the copy of the file temp_file = os.path.join(DIRECTORY, basename_key) queue.put(Task(key, temp_file, DIRECTORY)) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
logging.info("Going to run command %s", run_command) with subprocess.Popen(run_command, shell=True) as process: retcode = process.wait() if retcode == 0: logging.info("Run successfully") else: logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) print("Result path", os.listdir(result_path)) print("Server log path", os.listdir(server_log_path)) state, description, test_results, additional_logs = process_result( result_path, server_log_path) ch_helper = ClickHouseHelper() s3_helper = S3Helper('https://s3.amazonaws.com') report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME) print(f"::notice ::Report url: {report_url}") post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url) prepared_events = prepare_tests_results_for_clickhouse( pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() args = parse_args() if args.suffix: global NAME NAME += f" {args.suffix}" changed_json = os.path.join(TEMP_PATH, f"changed_images_{args.suffix}.json") else: changed_json = os.path.join(TEMP_PATH, "changed_images.json") push = not args.no_push_images if push: subprocess.check_output( # pylint: disable=unexpected-keyword-arg "docker login --username 'robotclickhouse' --password-stdin", input=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", shell=True, ) if os.path.exists(TEMP_PATH): shutil.rmtree(TEMP_PATH) os.makedirs(TEMP_PATH) if args.image_path: pr_info = PRInfo() pr_info.changed_files = set(i for i in args.image_path) else: pr_info = PRInfo(need_changed_files=True) changed_images = get_changed_docker_images(pr_info, GITHUB_WORKSPACE, "docker/images.json") logging.info("Has changed images %s", ", ".join([im.path for im in changed_images])) image_versions, result_version = gen_versions(pr_info, args.suffix) result_images = {} images_processing_result = [] for image in changed_images: images_processing_result += process_image_with_parents( image, image_versions, push) result_images[image.repo] = result_version if changed_images: description = "Updated " + ",".join([im.repo for im in changed_images]) else: description = "Nothing to update" if len(description) >= 140: description = description[:136] + "..." with open(changed_json, "w", encoding="utf-8") as images_file: json.dump(result_images, images_file) s3_helper = S3Helper("https://s3.amazonaws.com") s3_path_prefix = (str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_")) status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {url}") print(f'::set-output name=url_output::"{url}"') if args.no_reports: return gh = Github(get_best_robot_token()) post_commit_status(gh, pr_info.sha, NAME, description, status, url) prepared_events = prepare_tests_results_for_clickhouse( pr_info, test_results, status, stopwatch.duration_seconds, stopwatch.start_time_str, url, NAME, ) ch_helper = ClickHouseHelper() ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
status = "failure" else: for f in files: path = os.path.join(test_output, f) additional_files.append(path) with open(path, "r", encoding="utf-8") as check_file: for line in check_file: if "ERROR" in line: lines.append((line.split(":")[-1], "FAIL")) if lines: status = "failure" description = "Found errors in docs" elif status != "failure": lines.append(("No errors found", "OK")) else: lines.append(("Non zero exit code", "FAIL")) s3_helper = S3Helper("https://s3.amazonaws.com") report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME) print("::notice ::Report url: {report_url}") commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=status, target_url=report_url) if status == "failure": sys.exit(1)