Example #1
0
 def test_persist_channels_contain_something_in_files(self):
     MetricsCalculation.persist_data({'X': np.array([])}, 'test')
     regex = re.compile('.*test*')
     path_to_directory = "./data/processed"
     directory_contents = os.listdir(path_to_directory)
     filtered_contents = list(filter(regex.match, directory_contents))
     for filename in filtered_contents:
         with open(os.path.join(path_to_directory, filename), 'r') as f:
             lines = f.readlines()
             assert len(lines) > 0
Example #2
0
    def test_persist_parameters_all_files_contain_same_data(self):
        MetricsCalculation.persist_data({'a': 0}, 'test')
        regex = re.compile('.*test*')
        path_to_directory = "./data/processed"
        directory_contents = os.listdir(path_to_directory)
        filtered_contents = list(filter(regex.match, directory_contents))

        csv_data, txt_data, json_data = load_files(path_to_directory,
                                                   filtered_contents)
        txt_data = {k: int(v) for k, v in txt_data.items()}
        assert txt_data == json_data
        for k in json_data.keys():
            assert json_data[k] == int(csv_data[k])
            assert txt_data[k] == int(csv_data[k])
Example #3
0
 def test_persist_incorrect_channels_does_not_creates_files(self):
     MetricsCalculation.persist_data({'X': 'incorrect_data'}, 'test')
     regex = re.compile('.*test*')
     directory_contents = os.listdir("./data/processed")
     assert len(list(filter(regex.match, directory_contents))) == 0
Example #4
0
 def test_persist_channels_successfully_creates_files(self):
     MetricsCalculation.persist_data({'X': np.array([])}, 'test')
     regex = re.compile('.*test*')
     directory_contents = os.listdir("./data/processed")
     assert len(list(filter(regex.match, directory_contents))) == 3
Example #5
0
# Metrics collection
if os.path.isfile('./data/performance/performance_metrics.csv'):
    metrics = pd.read_csv('./data/performance/performance_metrics.csv')
else:
    metrics = pd.DataFrame()

channels, parameters, metrics = DataIngestor.ingest_data(
    './data/channels.txt', './data/parameters.txt', metrics)
channels, parameters, metrics = MetricsCalculation.calculate_metrics(
    channels, parameters, metrics)

# Performance metrics gathering for data persistence
if metrics is not None:
    start = time.time()

MetricsCalculation.persist_data(channels, 'channels')
MetricsCalculation.persist_data(parameters, 'parameters')

# Performance metrics gathering for data persistence
if metrics is not None:
    end = time.time()
    metrics = metrics.append(
        {
            'key': 'metrics_persisting',
            'value': end - start
        }, ignore_index=True)

if metrics is not None:
    metrics.to_csv('./data/performance/performance_metrics.csv', index=False)