Esempio n. 1
0
 def cleanDataPoints (filename, dataPoints, numSteps):
     for key in dataPoints:
         for i in range(0, numSteps):
             if (i < len(dataPoints[key])):
                 if (dataPoints[key][i].getTime() != i):
                     dataPoints[key].insert(i, DataPoint(i, dataPoints[key][i - 1].getConcentration()))
             else:
                 dataPoints[key].append(DataPoint(i, dataPoints[key][-1].getConcentration()))
     return dataPoints
Esempio n. 2
0
 def __init__(self, X_train, Y_train, k):
     self.X_train = X_train
     self.Y_train = Y_train
     self.k = k
     self.datapoints = []
     for xt, yt in zip(self.X_train, self.Y_train):
         self.datapoints.append(DataPoint(xt, yt))
Esempio n. 3
0
    def update(self):
        new_data = self.get_new_data()

        for i in range(len(new_data) - 1):
            self.data.append(
                DataPoint(time=new_data[i + 1].time,
                          value=new_data[i + 1].time - new_data[i].time))
Esempio n. 4
0
    def update(self):
        if len(self.data_source.data) < type(self).WINDOW_SIZE:
            return

        if len(self.data) and self.data_source.data[
                -type(self).WINDOW_SIZE +
                type(self).START_INDEX].time <= self.data[-1].time:
            return

        data_samples, time_samples = zip(
            *[(dp.value, dp.time)
              for dp in self.data_source.data[-type(self).WINDOW_SIZE:]])

        frequency = type(self).calculate_frequency(
            self.data_source.data[-type(self).WINDOW_SIZE:])
        filtered_data = type(self).butter_bandpass_filter(
            data_samples, frequency, self.lowcut, self.highcut,
            self.order_lowcut, self.order_highcut)

        assert len(time_samples) == len(filtered_data)

        data_points = [
            DataPoint(time=time_samples[i], value=filtered_data[i])
            for i in range(type(self).START_INDEX,
                           type(self).STOP_INDEX)
        ]

        self.data += data_points
Esempio n. 5
0
    def update(self):
        if len(self.data_source.data) == 0:
            return

        filtered_data = self.data_source.data[len(self.data):]

        self.data.extend([DataPoint(time=dataPoint.time, value=dataPoint.value**2) for dataPoint in filtered_data])
Esempio n. 6
0
    def processingComplete(self, dataPoint: DataPoint):
        print('Video', dataPoint.videoPath, 'has completed processing.')
        # id(oldVid) != id(dataPoint) so changes made to dataPoint in
        # BehaviorClassifier are not reflected in oldVid. oldVid and
        # dataPoint are different python objects.

        dataPoint.saveToStorage(
            self.storage,
            shouldSaveFeatures=self.ui.saveFeaturesCheckBox.isChecked())
        self.dataPoints[dataPoint.videoPath] = dataPoint
        self.dialog.updateState(self.dataPoints)

        self.updateFileTableForDataPointChange()

        if self.isCurrentVideo(dataPoint):
            self.setCurrentVideo(dataPoint, play=False)
 def generate_data_set(self, length: int):
     self.data_set = list()
     for _ in range(length):
         value = random.randint(10, self.window_height)
         green_portion = int(value / self.window_height * 255)
         red_portion = 255 - green_portion
         rgb = RGB(red_portion, green_portion, 0)
         self.data_set.append(DataPoint(value, rgb))
def create_datapoint_objects(csv_name):
    
    with open(csv_name) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter = ',')
        datapoint_list = []

        for row in csv_reader:
            datapoint = DataPoint(row[0], row[1], row[2], row[3], row[4], row[5], row[6])
            datapoint_list.append(datapoint)

    return datapoint_list
Esempio n. 9
0
 def loadVideosFromFolder(self, folder):
     videoPaths = self.storage.recursivelyFindVideosInFolder(folder)
     for videoPath in videoPaths:
         # Do not load videos that have no precomputed boxes while in USE_PRECOMPUTED_FEATURES mode
         if CONFIG.USE_PRECOMPUTED_FEATURES:
             videoFeaturesPath = videoPath.replace('videos',
                                                   'features').replace(
                                                       '.avi', '.pkl')
             if not self.storage.fileExists(videoFeaturesPath):
                 continue
         dataPoint = DataPoint(videoPath, self.storage,
                               self.labelsSaveFolder)
         self.dataPoints[dataPoint.videoPath] = dataPoint
         self.addToVideoList(dataPoint)
Esempio n. 10
0
    def add_data(self, time, value):
        if self.warmup_counter < RawData.WARMUP_STEPS:
            self.warmup_counter += 1
            return

        time = int(time) / 1000000
        value = int(value)

        if self.start_time is None:
            self.start_time = time

        time = time - self.start_time

        if value >= RawData.MIN_VALUE and value <= RawData.MAX_VALUE:
            self.data.append(DataPoint(time=time, value=value))
Esempio n. 11
0
    def update(self):
        if len(self.data
               ) and self.data[-1].time == self.data_source.data[-1].time:
            return

        last_time = self.data[-1].time if len(self.data) else 0

        for index, r_peak in enumerate(self.data_source.data):
            if r_peak.time > last_time:

                start_index = index - self.number_of_peaks if index > self.number_of_peaks else 0
                self.data.append(
                    DataPoint(time=r_peak.time,
                              value=HeartRate.calculate_heart_rate(
                                  self.data_source.data[start_index:index])))
                last_time = r_peak.time
Esempio n. 12
0
    def update(self):
        start_index = self.get_start_index()

        if start_index == len(self.data_source.data):
            return

        for i in range(start_index, len(self.data_source.data)):
            if self.window_size is None:
                window = self.data_source.data[:i + 1]
            elif i >= self.window_size - 1:
                window = self.data_source.data[i - self.window_size + 1:i + 1]
            else:
                continue

            self.data.append(
                DataPoint(time=window[-1].time,
                          value=type(self).rmssd(
                              [data_point.value for data_point in window])))
Esempio n. 13
0
    def update(self):
        if len(self.data_source.data) < type(self).WINDOW_SIZE:
            return

        filtered_data = self.data_source.data[-type(self).WINDOW_SIZE:]
        index_base = int(len(filtered_data)/2)

        mean_time = (filtered_data[-1].time - filtered_data[0].time)/len(filtered_data)
        frequency_resolution = 1/(mean_time * type(self).WINDOW_SIZE)

        mean_value = np.mean([data.value for data in filtered_data])

        fft = np.fft.fft(np.array([data.value - mean_value for data in filtered_data]))

        fft = np.array([value * self.transfer_function(type(self).index_to_freq(index, mean_time)) for index, value in enumerate(fft)])
        ifft = np.fft.ifft(fft)

        self.data.append(DataPoint(time=filtered_data[index_base].time, value=ifft[index_base].real))
Esempio n. 14
0
def diff_raw_USA_data():
    try:
        open(
            os.path.dirname(os.path.abspath(__file__)) +
            '/../fetch/raw_USA.csv', 'r')
        open(
            os.path.dirname(os.path.abspath(__file__)) +
            '/../data/USA_data.csv')
    except:
        print('Could not access USA raw/parsed data file.')
        return
    else:
        country_file = open(
            os.path.dirname(os.path.abspath(__file__)) +
            '/../data/USA_data.csv')
        raw_data_file = open(
            os.path.dirname(os.path.abspath(__file__)) +
            '/../fetch/raw_USA.csv', 'r')
        last_line = ''
        for line in country_file:
            last_line = line
        last_date = last_line[:last_line.find(',')]
        new_data = []
        for i, line in enumerate(raw_data_file):
            if (i == 0):
                continue
            parsed_line = line.split(',')
            curr_date = str(parsed_line[0][:4]) + '-' + str(
                parsed_line[0][4:6]) + '-' + str(parsed_line[0][6:])
            if (time.strptime(last_date, '%Y-%m-%d') < time.strptime(
                    curr_date, '%Y-%m-%d')):
                #Index  Data
                #   0   Date
                #   2   Cases
                #   13  Deaths
                #   17  Tests
                #   11    Recovered
                #   6   Hospitalized
                new_data.insert(
                    0,
                    DataPoint(curr_date, int(parsed_line[2]),
                              int(parsed_line[13]), int(parsed_line[17]),
                              int(parsed_line[11]), int(parsed_line[6])))
        return new_data
Esempio n. 15
0
    def update(self):
        start_index = self.get_start_index()

        if start_index == len(self.data_source.data):
            return

        for i in range(start_index, len(self.data_source.data)):
            if self.window_size is None:
                window = self.data_source.data[:i + 1]
            elif i >= self.window_size - 1:
                window = self.data_source.data[i - self.window_size + 1:i + 1]
            else:
                continue
            try:
                self.data.append(
                    DataPoint(time=window[-1].time,
                              value=self.power(self.interpolate(window))))
            except ValueError:
                pass
Esempio n. 16
0
    def getCellStates (filename, coords):
        dataPoints = []
        currTime = 0
        with open(filename, "r") as f:
            for line in f:
                # Update time
                if (Parse.isTime(line)):
                    # If there is no cell with the given coordinates in a particular time, use the previous
                    if (currTime > 0 and len(dataPoints) > 0 and dataPoints[-1].getTime() != currTime):
                        dataPoints.append(DataPoint(currTime, dataPoints[-1].getConcentration()))
                    currTime = int(line)
                    continue

                # Adds cell to list
                if (Parse.matchesCoords(line, coords)):
                    dataPoint = Parse.getDataPoint(currTime, line)
                    if (dataPoint not in dataPoints):
                        dataPoints.append(dataPoint)
        return dataPoints
Esempio n. 17
0
def diff_raw_PER_data():
    try:
        open(
            os.path.dirname(os.path.abspath(__file__)) +
            '/../fetch/raw_PER.csv', 'r')
        open(
            os.path.dirname(os.path.abspath(__file__)) +
            '/../data/PER_data.csv')
    except:
        print('Could not access PER raw/parsed data file.')
        return
    else:
        country_file = open(
            os.path.dirname(os.path.abspath(__file__)) +
            '/../data/PER_data.csv')
        raw_data_file = open(
            os.path.dirname(os.path.abspath(__file__)) +
            '/../fetch/raw_PER.csv', 'r')
        last_line = ''
        for line in country_file:
            last_line = line
        last_date = last_line[:last_line.find(',')]
        new_data = []
        for i, line in enumerate(raw_data_file):
            if (i == 0):
                continue
            parsed_line = line.split(',')
            if (time.strptime(last_date, '%Y-%m-%d') < time.strptime(
                    str(parsed_line[0]), '%Y-%m-%d')):
                #Index  Data
                #   0   Date
                #   1   Cases
                #   2   Deaths
                #   3   Tests
                #   4   Recovered
                #   5   Hospitalized
                new_data.append(
                    DataPoint(parsed_line[0], int(parsed_line[1]),
                              int(parsed_line[2]), int(parsed_line[3]),
                              int(parsed_line[4]), int(parsed_line[5])))
        return new_data
#! /usr/bin/env python

import sys
from DataPoint import DataPoint
wfile = open("map3.txt","w+")

prevKey = None
count = 0
yearSum = 0
tempSum = 0
avgYear = 0
avgTemp = 0

for line in sys.stdin:
	(key , value) = line.split("\t")
	kCentroid = DataPoint(key.strip())
	dataPoints = DataPoint(value.strip())

	#print("K Centroid: " + kCentroid.toString() + "\t Data Point: " + dataPoints.toString())


	if prevKey is None:
		prevKey = kCentroid
		
		#print prevKey.toString()
		#print kCentroid.toString()

	if prevKey == kCentroid:
		yearSum = yearSum + dataPoints.year
		tempSum = tempSum + dataPoints.temp
		count = count + 1
Esempio n. 19
0
#! /usr/bin/env python

import sys
from DataPoint import DataPoint

canopyCenters = []

for line in sys.stdin:
	(kev,value) = line.split("\t")
	dp = DataPoint(value.strip())

	if len(canopyCenters) == 0:
		canopyCenters.append(dp)

	else:
		insert = True

		for center in canopyCenters:
			if dp.checkT2(center):
				insert = False
				break

		if insert == True:
			canopyCenters.append(dp)

for canopyCenter in canopyCenters:
	print("1\t" + canopyCenter.toString())
Esempio n. 20
0
def processVideo(dp: DataPoint, vehicleDetector, laneLineDetector,
                 progressTracker, stopEvent):

    video = Video(dp.videoPath)
    totalNumFrames = video.getTotalNumFrames()

    videoFeaturesPath = dp.videoPath.replace('videos', 'features').replace(
        '.avi', '.pkl')

    if CONFIG.USE_PRECOMPUTED_FEATURES:
        vehicleDetector.loadFeaturesFromDisk(videoFeaturesPath)
        laneLineDetector.loadFeaturesFromDisk(videoFeaturesPath)

    tracker = VehicleTracker()
    labelGen = LabelGenerator(video.getFps())

    if CONFIG.MAKE_PRECOMPUTED_FEATURES:
        allboxes, allboxscores, allvehicles, alllines = [], [], [], []

    frames = []
    for frameIndex in range(totalNumFrames):

        if stopEvent.is_set():
            print("Classifier process exited.", flush=True)
            return dp

        if CONFIG.SHOULD_LOAD_VID_FROM_DISK:
            isFrameAvail, frame = video.getFrame(vehicleDetector.wantsRGB)
        else:
            isFrameAvail, frame = True, None

        if not isFrameAvail:
            print('Video=' + dp.videoPath + ' returned no frame for index=' +
                  str(frameIndex) + ' but totalNumFrames=' +
                  str(totalNumFrames))
            rawboxes, boxscores, vehicles, lines = [], [], [], [[], []]
        else:

            rawboxes, boxscores = vehicleDetector.getFeatures(frame)
            vehicles = tracker.getVehicles(frame, rawboxes, boxscores)
            lines = laneLineDetector.getLines(frame)
            try:
                labelGen.processFrame(vehicles, lines, frameIndex)
            except Exception as e:
                print(e)

        if CONFIG.MAKE_PRECOMPUTED_FEATURES:
            allboxes.append(rawboxes)
            allboxscores.append(boxscores)
            allvehicles.append(vehicles)
            alllines.append(lines)

        _updateDataPoint(dp, rawboxes, vehicles, lines)
        progressTracker.setCurVidProgress(frameIndex / totalNumFrames)
        progressTracker.incrementNumFramesProcessed()

    if CONFIG.MAKE_PRECOMPUTED_FEATURES:
        import pickle
        with open(videoFeaturesPath, 'wb') as file:
            pickle.dump([allboxes, allboxscores, alllines, allvehicles], file)

    dp.predictedLabels = labelGen.getLabels()
    dp.hasBeenProcessed = True
    return dp
	for kCentroid in kCentroids:
		if (canopyCenter.checkT1(kCentroid)):
			#kCentroidsList.append(kCentroid.toString())
			kCentroidsList.append(kCentroid)
	if len(kCentroidsList)>0:
		#canopyCenterKCentroidsDict = {canopyCenter.toString():kCentroidsList}
		canopyCenterKCentroidsDict[canopyCenter] = kCentroidsList


for line in sys.stdin:
	(key,value) = line.strip().split("\t")
	#key = canopy center
	#value = data point

	canopyCenter = DataPoint(key)
	dataPoint = DataPoint(value)

	#print ("P1>\tcanopyCenter: " + canopyCenter.toString() + "\t dataPoint: " + dataPoint.toString())


	if canopyCenter in canopyCenterKCentroidsDict:
		#print ("canopyCenter : " + canopyCenter.toString() + "\t" + "kCentroidsList : " + "\t dataPoint : " + dataPoint.toString())
		kCentroidsList = canopyCenterKCentroidsDict[canopyCenter]	
		if len(kCentroidsList) < 1 :
			continue	
		minDistance = dataPoint.complexDistance(kCentroidsList[0])
		#print("Initial minDistance : " + str(minDistance) + "\tkCentroidsList[0] : " + kCentroidsList[0].toString())	
		pos = 0
		for i in range (1 , len(kCentroidsList)):
			currentDistance = dataPoint.complexDistance(kCentroidsList[i])
		#	print("currentDistance : " + str(currentDistance) + "\tkCentroidsList[i]" + kCentroidsList[i].toString())
Esempio n. 22
0
 def populate_list(self):
     for i in range(0, self.size):
         self.my_list.append(DataPoint(self.min, self.max))
Esempio n. 23
0
 def getDataPoint (time, line):
     return DataPoint(time, [int(element) for element in line[line.rfind("<") + 1:line.rfind(">")].split(",")][1])
Esempio n. 24
0
    def read_file(self):
        """
        This method will read the CSV data file and assign to the data points property a list of the data points held in
        this CSV data file

        :return: nothing
        """

        # Clear down any existing entries
        self.__dataPointsList = []
        self.__dataLogList = []

        # Used as flag to skip the first row from the CSV file which is header
        skip_header_row = False

        # Open the CSV data file for reading and read each text line in sequence until and of life
        file = open(self.filePath, "r")

        # Read each line from the CSV data file
        for line in file:

            # Check to see if the first row has been read
            if skip_header_row:

                # The values of the current row will be splited and save as a tuple
                # each value can be access by it's index
                # the number of indexes will be determined by how many columns are in the CSV data file
                row = line.split(",")

                # Store the values of the current row
                entriNo = str(row[0])
                date = str(row[1])
                timeStamp = str(row[2])
                temperature = str(row[3])
                humidity = str(row[4])
                staff_code = str(row[5])  # Added on May-2019

                # take the date and time values in order to create a datetime object
                splitDateValue = date.split(
                    "/"
                )  # format: number les then 10 ---> 5/3/2019 || bigger then 10 --> 10/10/2019
                splitTimeValue = timeStamp.split(
                    ":"
                )  #format: if the numbers are les then 10 ---> 9:1:1 if bigger --->> 11:11:11

                # this datetime object is used to get the 0 in front of the numbers that are les then 10
                date_time_object = datetime(int(splitDateValue[2]),
                                            int(splitDateValue[1]),
                                            int(splitDateValue[0]),
                                            int(splitTimeValue[0]),
                                            int(splitTimeValue[1]),
                                            int(splitTimeValue[2]))

                timeStamp = date_time_object.time(
                )  #time stamp format: less then 10 --> 01:01:00 || bigger then 10 --> 10:20:30

                scaleTemp = "C"  # temperature measuring scale
                scaleHumidity = "%"  # humidity measuring scale

                tempData = Temperature(
                    float(temperature), scaleTemp
                )  #Create a temperature object and pass the necessary attributes
                humidityData = Humidity(
                    float(humidity), scaleHumidity
                )  #Create a humidity object and pass the necessary attributes

                #Create a datapoint object and pass the necessary attributes
                data_point = DataPoint(entriNo, str(date), str(timeStamp),
                                       tempData.value, humidityData.value,
                                       staff_code)

                #Add the data point created into a list with data points
                self.dataPointsList.append(data_point.List_format())
            else:
                #When is set to True the data from the CSV file will be read
                skip_header_row = True
Esempio n. 25
0
 def transformation(cls, signal):
     result = sum([data.value for data in signal]) / len(signal)
     return DataPoint(time=signal[-1].time, value=result)
Esempio n. 26
0
# put em in lists

if len(sys.argv)<3:
	print "Error: Insufficient Arguments"
	sys.exit(-1)

oldCentroidsFile = hdfs.open(sys.argv[1])
newCentroidsFile = hdfs.open(sys.argv[2])

oldCentroids = []
newCentroids = []

for line in oldCentroidsFile:
	if line.find("\t") != -1:
		(key,value) = line.strip().split("\t")
		oldCentroid = DataPoint(value)
	else:
		oldCentroid = DataPoint(line.strip()) 
	oldCentroids.append(oldCentroid)

for line in newCentroidsFile:
	(key,value) = line.strip().split("\t")
	newCentroids.append(DataPoint(value))


# compare every element with coressponding element in the other list using complex distance of the data points.

for i in range(len(oldCentroids)):
	#print("Distance between " + oldCentroids[i].toString() 
	#	+ " and " + newCentroids[i].toString() + " : " + str(oldCentroids[i].complexDistance(newCentroids[i])))
	if (oldCentroids[i].complexDistance(newCentroids[i])) > DataPoint.THRESHOLD:
Esempio n. 27
0
#! /usr/bin/env python

import sys
from DataPoint import DataPoint

canopyCenters = []

#taking data from the std input

for line in sys.stdin:
    dp = DataPoint(line.strip())

    if len(canopyCenters) == False:
        canopyCenters.append(dp)

    else:
        insert = True
        for center in canopyCenters:
            if dp.checkT2(center):
                insert = False
                break
        if insert == True:
            canopyCenters.append(dp)

#printing data std output
for canopyCenter in canopyCenters:
    print("1\t" + canopyCenter.toString())
Esempio n. 28
0
canopyCentersFile = hdfs.open(sys.argv[1])
kCentroidsFile = hdfs.open(sys.argv[2])

kCentroids = []
canopyCenters = []
centroidList = []
canopyCenterKCentroidsDict = {}

### Setup for the mapper in Cluster Centroid Assignment Stage:

## Reading k-centroids (gen.py) and canopyCenters (mapperStg2.py) into lists:

for line in kCentroidsFile:
    if line.find("\t") != -1:
        (key, value) = line.split("\t")
        kp = DataPoint(value.strip())
    else:
        kp = DataPoint(line.strip())
    kCentroids.append(kp)

for line in canopyCentersFile:
    cp = DataPoint(line.strip().split("\t")[1])
    canopyCenters.append(cp)

## Adding the k-centroids and canopyCenters to a dictionary:
# outer loop canopy centers

for canopyCenter in canopyCenters:
    kCentroidsList = []
    for kCentroid in kCentroids:
        if (canopyCenter.checkT1(kCentroid)):
Esempio n. 29
0
 def changed(self, value):
     data = DataPoint(self.id, encode(self.encoding, value))
     self.module.pass_down(data)
	sys.exit(-1)

file = hdfs.open(sys.argv[1])

kCentroids = []

for line in file:
	(key, value) = line.strip().split("\t")
	kCentroid = DataPoint(value)
	kCentroids.append(kCentroid)

file.close()

"""
for dataPoint in dataPoints:
	print dataPoint.toString()
"""


for line in sys.stdin:
	dataPoint = DataPoint(line.strip())
	
	minDistance = dataPoint.complexDistance(kCentroids[0])
	pos = 0
	for i in range (1, len(kCentroids)):
		distance = dataPoint.complexDistance(kCentroids[i])
		if distance < minDistance:
			minDistance = distance
			pos = i
	print(kCentroids[pos].toString()+"\t"+dataPoint.toString())
Esempio n. 31
0
    def scrapeBoxScore(self, file_path):

        # initialize beautiful soup variable
        html_doc = open(file_path, "r")
        soup = BeautifulSoup(html_doc, 'html.parser')

        # load basic info
        split_path = file_path.split("/")[-1].split("_")
        self.setBasicInfo(split_path)

        table_arr = soup.find_all('table')
        inning_scores = []

        table_no = 0
        for table in table_arr:

            for child1 in table.contents:

                if child1.name == "tbody":
                    for child2 in child1:

                        if child2.name == "tr":
                            for child3 in child2:
                                if child3.name == "th":
                                    # create DataPoint object
                                    temp_data_point = DataPoint(
                                        table_no, child3.text)
                                    self.dataPointList.append(temp_data_point)
                                elif child3.name == "td":
                                    if table_no == 0:
                                        inning_scores.append(child3.text)
                                    pass
                                elif child3.name:
                                    raise ValueError(child3.name)
                        elif child2.name:
                            pass

                elif child1.name == "caption":

                    if child1.text == "Team Score By Innings":
                        pass
                    elif child1.text == "Scoring Summary":
                        pass
                    elif "Top" in child1.text:
                        pass
                    elif "Bottom" in child1.text:
                        pass
                    elif "Composite Stats" in child1.text:
                        pass
                    elif "Pitching Stats" in child1.text:
                        pass
                    else:
                        pass

                elif child1.name == "thead":
                    pass
                elif child1.name == "tfoot":
                    pass
                elif child1.name:
                    raise ValueError("Missing", child1.name)

            table_no += 1

        self.innings = int(len(inning_scores) / 2 - 3)
        return self
Esempio n. 32
0
 def transformation(cls, signal):
     sampling_time = (signal[-1].time - signal[0].time) / (cls.WINDOW_SIZE -
                                                           1)
     result = (-signal[0].value - 2 * signal[1].value +
               2 * signal[3].value + signal[4].value) / (8 * sampling_time)
     return DataPoint(time=signal[2].time, value=result)