def test_validate_pypi_content(): bq_validation = BQValidation() content = 'flask' assert not set(bq_validation.validate_pypi(content)).difference([content]) content = ['flask', 'django', 'unknownpkg'] assert not set(['flask', 'django']).difference(bq_validation.validate_pypi(content)) content = {'flask', 'django'} assert not content.difference(bq_validation.validate_pypi(content)) content = frozenset(['flask', 'django']) assert not content.difference(bq_validation.validate_pypi(content)) with pytest.raises(ValueError): bq_validation.validate_pypi({"name": "flask"})
def process(self, validate=False): """Process Pypi Bigquery response data.""" bq_validation = BQValidation() logger.info("Running Bigquery for pypi synchronously") self.big_query_instance.run_query_sync() start_process_time = time.monotonic() for idx, obj in enumerate(self.big_query_instance.get_result()): start = time.monotonic() content = obj.get('content') packages = [] if content: try: packages = sorted( {p for p in pip_req.parse_requirements(content)}) if validate: packages = sorted( bq_validation.validate_pypi(packages)) except Exception as _exc: logger.error("IGNORE: {}".format(_exc)) logger.error( "Failed to parse content data {}".format(content)) if packages: pkg_string = ', '.join(packages) logger.info("PACKAGES: {}".format(pkg_string)) self.counter.update([pkg_string]) logger.info("Processed content in time: {} counter:{}".format( (time.monotonic() - start), idx)) logger.info("Processed All the manifests in time: {}".format( time.monotonic() - start_process_time)) logger.info("updating file content") self.update_s3_bucket(data={'pypi': dict(self.counter.most_common())}, bucket_name=self.bucket_name, filename=self.filename) logger.info("Succefully Processed the PyPiBigQuery")
class PypiCollector(BaseCollector): """Handle Pypi manifests and extract dependencies.""" def __init__(self): """Initialize BG validation.""" super().__init__('pypi') self.bq_validation = BQValidation() def parse_and_collect(self, content, validate): """Parse dependencies and add it to collection.""" packages = None try: packages = sorted({p for p in pip_req.parse_requirements(content)}) if validate: packages = sorted(self.bq_validation.validate_pypi(packages)) except Exception as e: logger.warning('Error in content, it raises %s', e) self._update_counter(packages)
import numpy as np import hpfrec import json import logging import subprocess from src.config.path_constants import (PACKAGE_TO_ID_MAP, MANIFEST_TO_ID_MAP, MANIFEST_PATH, HPF_MODEL_PATH, ECOSYSTEM, HYPERPARAMETERS_PATH, MODEL_VERSION) from src.config.cloud_constants import (S3_BUCKET_NAME, AWS_S3_SECRET_KEY_ID, AWS_S3_ACCESS_KEY_ID, GITHUB_TOKEN) logging.basicConfig() _logger = logging.getLogger() _logger.setLevel(logging.INFO) bq_validator = BQValidation() def load_s3(): # pragma: no cover """Create connection s3.""" s3_object = AmazonS3(bucket_name=S3_BUCKET_NAME, aws_access_key_id=AWS_S3_ACCESS_KEY_ID, aws_secret_access_key=AWS_S3_SECRET_KEY_ID) s3_object.connect() if s3_object.is_connected(): _logger.info("S3 connection established.") return s3_object raise Exception("S3 Connection Failed")
def __init__(self): """Initialize BG validation.""" super().__init__('pypi') self.bq_validation = BQValidation()