예제 #1
0
 def test_validate_pypi_content():
     bq_validation = BQValidation()
     content = 'flask'
     assert not set(bq_validation.validate_pypi(content)).difference([content])
     content = ['flask', 'django', 'unknownpkg']
     assert not set(['flask', 'django']).difference(bq_validation.validate_pypi(content))
     content = {'flask', 'django'}
     assert not content.difference(bq_validation.validate_pypi(content))
     content = frozenset(['flask', 'django'])
     assert not content.difference(bq_validation.validate_pypi(content))
     with pytest.raises(ValueError):
         bq_validation.validate_pypi({"name": "flask"})
예제 #2
0
    def process(self, validate=False):
        """Process Pypi Bigquery response data."""
        bq_validation = BQValidation()
        logger.info("Running Bigquery for pypi synchronously")
        self.big_query_instance.run_query_sync()
        start_process_time = time.monotonic()
        for idx, obj in enumerate(self.big_query_instance.get_result()):
            start = time.monotonic()
            content = obj.get('content')
            packages = []
            if content:
                try:
                    packages = sorted(
                        {p
                         for p in pip_req.parse_requirements(content)})
                    if validate:
                        packages = sorted(
                            bq_validation.validate_pypi(packages))
                except Exception as _exc:
                    logger.error("IGNORE: {}".format(_exc))
                    logger.error(
                        "Failed to parse content data {}".format(content))

                if packages:
                    pkg_string = ', '.join(packages)
                    logger.info("PACKAGES: {}".format(pkg_string))
                    self.counter.update([pkg_string])
                logger.info("Processed content in time: {} counter:{}".format(
                    (time.monotonic() - start), idx))
        logger.info("Processed All the manifests in time: {}".format(
            time.monotonic() - start_process_time))

        logger.info("updating file content")
        self.update_s3_bucket(data={'pypi': dict(self.counter.most_common())},
                              bucket_name=self.bucket_name,
                              filename=self.filename)

        logger.info("Succefully Processed the PyPiBigQuery")
예제 #3
0
class PypiCollector(BaseCollector):
    """Handle Pypi manifests and extract dependencies."""

    def __init__(self):
        """Initialize BG validation."""
        super().__init__('pypi')
        self.bq_validation = BQValidation()

    def parse_and_collect(self, content, validate):
        """Parse dependencies and add it to collection."""
        packages = None
        try:
            packages = sorted({p for p in pip_req.parse_requirements(content)})
            if validate:
                packages = sorted(self.bq_validation.validate_pypi(packages))
        except Exception as e:
            logger.warning('Error in content, it raises %s', e)

        self._update_counter(packages)
예제 #4
0
import numpy as np
import hpfrec
import json
import logging
import subprocess
from src.config.path_constants import (PACKAGE_TO_ID_MAP,
                                       MANIFEST_TO_ID_MAP, MANIFEST_PATH, HPF_MODEL_PATH, ECOSYSTEM,
                                       HYPERPARAMETERS_PATH, MODEL_VERSION)
from src.config.cloud_constants import (S3_BUCKET_NAME,
                                        AWS_S3_SECRET_KEY_ID, AWS_S3_ACCESS_KEY_ID, GITHUB_TOKEN)

logging.basicConfig()
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

bq_validator = BQValidation()


def load_s3():  # pragma: no cover
    """Create connection s3."""
    s3_object = AmazonS3(bucket_name=S3_BUCKET_NAME,
                         aws_access_key_id=AWS_S3_ACCESS_KEY_ID,
                         aws_secret_access_key=AWS_S3_SECRET_KEY_ID)

    s3_object.connect()
    if s3_object.is_connected():
        _logger.info("S3 connection established.")
        return s3_object

    raise Exception("S3 Connection Failed")
예제 #5
0
 def __init__(self):
     """Initialize BG validation."""
     super().__init__('pypi')
     self.bq_validation = BQValidation()