Beispiel #1
0
 def _extract(self):
     extractor = DataExtractor(self.project)
     extractor.extract(True)
     path = extractor.get_bugged_methods_path(self.version, True)
     df = pd.read_csv(path, sep=';')
     key = 'method_id'
     bugged = df.groupby(key).apply(lambda x: dict(
         zip(["is_method_buggy"], x.is_method_buggy))).to_dict()
     self.data.set_raw_data(bugged)
class DataExtractorTest(unittest.TestCase):
    def setUp(self):
        self.extractor = DataExtractor(raw_data)
        self.extractor.extract()
        self.race = self.extractor.get_race()

    def test_extracts_heat(self):
        self.assertEquals(60, self.race.heat)

    def test_extracts_race_date_and_time(self):
        self.assertEquals(datetime.date(2011,12,23), self.race.date)
        self.assertEquals(datetime.time(20,36), self.race.time)

    def test_extract_driver_list(self):
        drivers = [u'CiglaR', u'CASPER', u'Brzi', u'bR1ck', u'gogoGT', u'Shorty', u'dastrong', u'skrla', u'slavisha', u'VINKO']
        self.assertEquals(drivers, self.race.driver_list)
def extract_data(project_ref):
    index = project_ref[0]
    project = project_ref[1]

    general_log = logging.getLogger(__name__)
    success_log = logging.getLogger("success")
    failure_log = logging.getLogger("failure")
    failure_verbose_log = logging.getLogger("failure_verbose")

    general_log.info(str(index) + ": " + project.github())
    try:
        extractor = DataExtractor(project)
        extractor.extract()
        success_log.info("Succeeded to extract {0}.".format(project.github()))
    except Exception as e:
        failure_log.error("Failed to extract {0}.".format(project.github()))
        failure_verbose_log.exception("Failed to extract {0}.".format(
            project.github()))
        return e
    return
Beispiel #4
0
 def test_extract(self):
     project = ProjectName.CommonsLang.value
     extractor = DataExtractor(project)
     extractor.extract()
Beispiel #5
0
from data_extractor import DataExtractor
# from model import build_model
from preprocess import write_to_file, preprocess_data
import sys

dataset_folder = sys.argv[1]
dataset_file = "dataset.json"
normalised_dataset_file = "normalised_data.json"

# extract data from review.json and business.json
data_extractor = DataExtractor(dataset_folder)
data_extractor.extract()
data_extractor.write_to_file()

# preprocess data and write final datasets to normalised_data.json
preprocess_data(dataset_file)

# build model
# build_model(normalised_dataset_file)