def __init__(self, pipe=None, flush=False): if not flush and self.read(): pass else: traindata, testdata = pipe() # create data points self.trainset = self.sort([ Datapoint(item) # sort list for item in traindata ]) self.testset = self.sort([Datapoint(item) for item in testdata]) # save to disk self.write()
def test_unique_constraint_acting_on_insert(self): with self.assertRaises(Exception): self.session.add(Datapoint(**self.datapoint1_values)) self.session.commit() self.session.close() self.session.rollback()
def __init__(self, dictionary, schema=None, attr_schema=None, save_loc="", error_loc=""): """ Initializes: name save_path and error_path attributes and data logger schema (declarations and accessors) attr_schema Logs errors from: schema initialization validation attr_schema initialization validation """ import _dataset_initializer as di from copy import deepcopy as dc from utils import Logger self.name = di.get_name(dictionary) self.save_path = di.get_save_path(dictionary, save_loc, name=self.name) self.error_path = di.get_error_path(dictionary, error_loc, name=self.name) self.attributes = dc(dictionary["attributes"]) self.data = dc(dictionary["data"]) self.logger = Logger(self.error_path) # SCHEMA self.schema = di.get_schema(dictionary, schema) self.declarations = self.schema.declarations self.accessors = self.schema.accessors self.editors = self.schema.editors for entry in self.data: self.schema.validate(entry) self.logger.log(self.schema.logger.errors, error_type="Schema") # ATTRIBUTE SCHEMA self.attr_schema = di.get_attr_schema(dictionary, attr_schema) self.attr_schema.validate(self.attributes) self.logger.log(self.attr_schema.logger.errors, error_type="AttrSchema") # DATAPOINTS from datapoint import Datapoint self.datapoints = map( lambda x: Datapoint(x, self.declarations, self.accessors, self. editors), self.data)
def setUp(self): # call create_all super(FilledDatabase, self).setUp() # datapoints self.datapoint1_values = dict(date="2014-03-31", freq='q', name="CPI_rog", value=102.3) self.datapoint2_values = dict(date="2017-03-16", freq='d', name="BRENT", value=50.56) x1 = Datapoint(**self.datapoint1_values) x2 = Datapoint(**self.datapoint2_values) # add and close session for x in x1, x2: self.session.add(x) self.session.commit() self.session.close()
def convertToCommonX(datapoints, xUnit): scaleMultiplier = xunitguesser.guessRatio(xUnit) if scaleMultiplier <= 0.0: raise SignalTraceError('Nonsensical X axis unit multiplier') converted = [] for pt in datapoints: converted.append(Datapoint(pt.x * scaleMultiplier, pt.y)) return converted
def test_duplicates_not_allowed_on_insert(self): # this datapoint already exists in the Database datapoint = Datapoint(**self.datapoint1_values) with self.assertRaises(Exception): self.session.add(datapoint) self.session.commit() self.session.close() self.session.rollback()
def upsert_one(session_factory, condition, new_value): with scope(session_factory) as session: session.expire_on_commit = False result = session.query(Datapoint).filter_by(**condition).first() if result is None: session.add(Datapoint(**condition, value=new_value)) return True else: if result.value != new_value: result.value = new_value return True return False
def scale(datapoints, scaleFrom, scaleTo): origin = datapoints[0].x shift = scaleFrom - datapoints[0].x stretchFactor = (scaleTo - scaleFrom) / (datapoints[-1].x - datapoints[0].x) scaled = [] for pt in datapoints: distance = pt.x - datapoints[0].x newX = distance * stretchFactor + shift + origin scaled.append(Datapoint(newX, pt.y)) return scaled
def _applyCropping(self): datapoints = [] x = self._cropFrom xStep = 1.0 / self._avgSR while x < self._srcDatapoints[0].x: datapoints.append(Datapoint(x, self._prepend)) x += xStep for pt in self._srcDatapoints: if pt.x > self._cropTo: break if pt.x >= self._cropFrom: datapoints.append(pt) x = self._srcDatapoints[-1].x while x <= self._cropTo: datapoints.append(Datapoint(x, self._append)) x += xStep return datapoints
def _payloadToSignal(block): path = block[0] dataID = block[1] xTitle = block[3] xUnit = block[5] yTitle = block[4] yUnit = block[6] datapoints = [] for pt in block[7]: datapoints.append(Datapoint(x=pt[0], y=pt[1])) return SignalTrace(datapoints, path, dataID, xTitle, xUnit, yTitle, yUnit)
def test_database_has_new_datapoint_after_it_has_been_inserted(self): # tests that insert of new unique row actually works new_datapoint = Datapoint(**self.non_existed_datapoint_values) self.session.add(new_datapoint) self.session.commit() self.session.close() condition = get_datapoint_condition(self.non_existed_datapoint_values) count = self.session.query(Datapoint) \ .filter_by(**condition) \ .count() assert count == 1
def __extract_data_point(self, line): line_params = line.split(delimiter) params = {} for key, value in zip(self.header, line_params): key = map_file_header(key) if '\n' in value: value.replace('\n', '') params[key] = value dp = Datapoint(params) print(f'{len(params)} params read for data point {dp.uid}') return dp
def test_database_has_new_datapoint_with_proper_value_after_it_has_been_inserted( self): # tests that insert of new unique row actually works (value testing) new_datapoint = Datapoint(**self.non_existed_datapoint_values) self.session.add(new_datapoint) self.session.commit() self.session.close() condition = get_datapoint_condition(self.non_existed_datapoint_values) result = self.session.query(Datapoint) \ .filter_by(**condition) \ .one() assert result.value == self.non_existed_datapoint_values["value"]
def get_aws_metric_statistics(self, instance_id, instance_value, metric_name, period, start_time, end_time, namespace, statistics): cloudwatch = boto3.client('cloudwatch') start_time = start_time + 'T00:00:00Z' end_time = end_time + 'T00:00:00Z' start_time = datetime.strptime(start_time, '%Y-%m-%dT%H:%M:%SZ') end_time = datetime.strptime(end_time, '%Y-%m-%dT%H:%M:%SZ') response = cloudwatch.get_metric_statistics( Namespace=namespace, Dimensions=[ { 'Name': instance_id, 'Value': instance_value } ], MetricName=metric_name, StartTime=start_time, EndTime=end_time, Period=period, Statistics=[statistics] ) results = response["Datapoints"] datapoints = [] for row in results: start_time = row["Timestamp"].strftime("%Y-%m-%d %H:%M:%S") start = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S") end = start + timedelta(seconds=period) end = end.strftime("%Y-%m-%d %H:%M:%S") value = row["Average"] datapoint = Datapoint(value, start_time, end) datapoints.append(datapoint) return datapoints
def lvq_classify(): pass mean1 = [4,5] mean2 = [3,2] scale1 = 1 scale2 = 1 numSamples = 5 dps = [] for ii in range(numSamples*2): dps.append(Datapoint([rnd.normal(loc=mean1[0], scale=scale1), rnd.normal(loc=mean1[1],scale=scale1)],0)) dps.append(Datapoint([rnd.normal(loc=mean2[0], scale=scale2), rnd.normal(loc=mean2[1], scale=scale2)],1)) plot1 = plt.figure(1) printDatapoints2d(dps,plot1) prototypes = lvq_train(dps) plot2 = plt.figure(2) printDatapoints2d(prototypes,plot2) xrange = numpy.linspace(0,10,20) yrange = numpy.linspace(0,10,20)
return {k: v for k, v in datapoint.items() if k != "value"} if __name__ == '__main__': engine = create_engine() create_tables(engine) session_factory = create_session_factory(engine) #sample data for datapoints d1 = dict(date="2014-03-31", freq='q', name="CPI_rog", value=102.3) d1_bis = dict(date="2014-03-31", freq='q', name="CPI_rog", value=102.0) d2 = dict(freq='m', name='BRENT', date='2017-09-20', value=50.25) # insert some non-existing datapoints is_inserted_1 = insert_one(session_factory, Datapoint(**d1)) is_inserted_2 = insert_one(session_factory, Datapoint(**d2)) assert (is_inserted_1 and is_inserted_2) # find a datapoint by date-freq-name condition = dict(date="2014-03-31", freq='q', name="CPI_rog") found = find_by(session_factory, condition) assert isinstance(found, list) assert found[0].id > 0 found[0].id = None assert found[0] == Datapoint(freq='q', name='CPI_rog', date='2014-03-31', value=102.3) _ = find_by(session_factory,
def addImage(self, image, count, char_type, char_index): d = Datapoint(image, count, char_type, char_index) self.darray[char_index].append(d)