예제 #1
0
 def __init__(self, data_obj, df_=pd.DataFrame()):
     """Create an instance for PreprocessData."""
     self.get_data_obj = data_obj
     self.utility_obj = Utility()
     self.get_keywords_obj = GetKeywords(data_obj)
     self.df_ = df_
     self.existing_data = self.get_data_obj.load_existing_data()
     self.pkg_kwd_df = self.fetch_package_keywords()
예제 #2
0
 def __init__(self,
              aws_access_key_id='',
              aws_secret_access_key='',
              aws_bucket_name='cvae-insights',
              model_version='',
              num_train_per_user=5):
     """Create an instance for GetPreprocessData."""
     self.obj_ = GetData(aws_access_key_id=aws_access_key_id,
                         aws_secret_access_key=aws_secret_access_key,
                         aws_bucket_name=aws_bucket_name,
                         model_version=model_version,
                         num_train_per_user=num_train_per_user)
     self.keyword_obj_ = GetKeywords(self.obj_)
     self.preprocess_data_obj = PreprocessData(data_obj=self.obj_)
     self.utils = Utility()
     self.num_users = num_train_per_user
예제 #3
0
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import json
import pandas as pd
from training.datastore.get_keywords import GetKeywords
import unittest

kws_obj = GetKeywords()

with open(
        'tests/test_data/npm/dev/2019-01-03/data/test-node-package-details-with-url.json'
) as f:
    test_data_df = pd.DataFrame(json.load(f))


class TestGetKeywords(unittest.TestCase):
    """This class tests the Get Keywords Class."""
    def test_from_existing_df(self):
        """Test extraction of data from existing dataframe."""
        test_package = str('algorithm')
        test_data = kws_obj.from_existing_df(test_data_df, test_package)
        assert len(test_data) == 4
예제 #4
0
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import json
import pandas as pd
from training.datastore.get_keywords import GetKeywords
import unittest

kws_obj = GetKeywords(local_data_store=True)

with open('tests/test_data/2019-01-03/data/node-package-details-with-url.json') as f:
    test_data_df = pd.DataFrame(json.load(f))


class TestGetKeywords(unittest.TestCase):
    """This class tests the Get Keywords Class."""

    def test_from_existing_df(self):
        """Test extraction of data from existing dataframe."""
        test_package = str('algorithm')
        test_data = kws_obj.from_existing_df(test_data_df, test_package)
        assert len(test_data) == 4

    def test_get_version(self):
예제 #5
0
Copyright © 2018 Red Hat Inc.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
from training.datastore.s3_connection import GetData
from training.datastore.get_keywords import GetKeywords
from training.datastore.preprocess_data import PreprocessData
from rudra import logger

obj_ = GetData()
keyword_obj_ = GetKeywords()
preprocess_data_obj = PreprocessData()
raw_data = obj_.load_raw_data()
packages = raw_data.get('package_list', [])
package_tag_map, vocabulary, manifest_user_data = preprocess_data_obj.update_pkg_tag_map()
logger.info("Package Tag Map is: {}".format(package_tag_map))
logger.info("Vocabulary is: {}".format(vocabulary))
logger.info("Manifest User Data is: {}".format(manifest_user_data))
예제 #6
0
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import json
import pandas as pd
from training.datastore.get_keywords import GetKeywords
import unittest

kws_obj = GetKeywords(data_obj=None)

with open(
        'tests/test_data/training-utils/test-node-package-details-with-url.json'
) as f:
    test_data_df = pd.DataFrame(json.load(f))


class TestGetKeywords(unittest.TestCase):
    """This class tests the Get Keywords Class."""
    def test_from_existing_df(self):
        """Test extraction of data from existing dataframe."""
        test_package = str('algorithm')
        test_data = kws_obj.from_existing_df(test_data_df, test_package)
        assert len(test_data) == 4