def cleanDataPoints (filename, dataPoints, numSteps): for key in dataPoints: for i in range(0, numSteps): if (i < len(dataPoints[key])): if (dataPoints[key][i].getTime() != i): dataPoints[key].insert(i, DataPoint(i, dataPoints[key][i - 1].getConcentration())) else: dataPoints[key].append(DataPoint(i, dataPoints[key][-1].getConcentration())) return dataPoints
def __init__(self, X_train, Y_train, k): self.X_train = X_train self.Y_train = Y_train self.k = k self.datapoints = [] for xt, yt in zip(self.X_train, self.Y_train): self.datapoints.append(DataPoint(xt, yt))
def update(self): new_data = self.get_new_data() for i in range(len(new_data) - 1): self.data.append( DataPoint(time=new_data[i + 1].time, value=new_data[i + 1].time - new_data[i].time))
def update(self): if len(self.data_source.data) < type(self).WINDOW_SIZE: return if len(self.data) and self.data_source.data[ -type(self).WINDOW_SIZE + type(self).START_INDEX].time <= self.data[-1].time: return data_samples, time_samples = zip( *[(dp.value, dp.time) for dp in self.data_source.data[-type(self).WINDOW_SIZE:]]) frequency = type(self).calculate_frequency( self.data_source.data[-type(self).WINDOW_SIZE:]) filtered_data = type(self).butter_bandpass_filter( data_samples, frequency, self.lowcut, self.highcut, self.order_lowcut, self.order_highcut) assert len(time_samples) == len(filtered_data) data_points = [ DataPoint(time=time_samples[i], value=filtered_data[i]) for i in range(type(self).START_INDEX, type(self).STOP_INDEX) ] self.data += data_points
def update(self): if len(self.data_source.data) == 0: return filtered_data = self.data_source.data[len(self.data):] self.data.extend([DataPoint(time=dataPoint.time, value=dataPoint.value**2) for dataPoint in filtered_data])
def processingComplete(self, dataPoint: DataPoint): print('Video', dataPoint.videoPath, 'has completed processing.') # id(oldVid) != id(dataPoint) so changes made to dataPoint in # BehaviorClassifier are not reflected in oldVid. oldVid and # dataPoint are different python objects. dataPoint.saveToStorage( self.storage, shouldSaveFeatures=self.ui.saveFeaturesCheckBox.isChecked()) self.dataPoints[dataPoint.videoPath] = dataPoint self.dialog.updateState(self.dataPoints) self.updateFileTableForDataPointChange() if self.isCurrentVideo(dataPoint): self.setCurrentVideo(dataPoint, play=False)
def generate_data_set(self, length: int): self.data_set = list() for _ in range(length): value = random.randint(10, self.window_height) green_portion = int(value / self.window_height * 255) red_portion = 255 - green_portion rgb = RGB(red_portion, green_portion, 0) self.data_set.append(DataPoint(value, rgb))
def create_datapoint_objects(csv_name): with open(csv_name) as csv_file: csv_reader = csv.reader(csv_file, delimiter = ',') datapoint_list = [] for row in csv_reader: datapoint = DataPoint(row[0], row[1], row[2], row[3], row[4], row[5], row[6]) datapoint_list.append(datapoint) return datapoint_list
def loadVideosFromFolder(self, folder): videoPaths = self.storage.recursivelyFindVideosInFolder(folder) for videoPath in videoPaths: # Do not load videos that have no precomputed boxes while in USE_PRECOMPUTED_FEATURES mode if CONFIG.USE_PRECOMPUTED_FEATURES: videoFeaturesPath = videoPath.replace('videos', 'features').replace( '.avi', '.pkl') if not self.storage.fileExists(videoFeaturesPath): continue dataPoint = DataPoint(videoPath, self.storage, self.labelsSaveFolder) self.dataPoints[dataPoint.videoPath] = dataPoint self.addToVideoList(dataPoint)
def add_data(self, time, value): if self.warmup_counter < RawData.WARMUP_STEPS: self.warmup_counter += 1 return time = int(time) / 1000000 value = int(value) if self.start_time is None: self.start_time = time time = time - self.start_time if value >= RawData.MIN_VALUE and value <= RawData.MAX_VALUE: self.data.append(DataPoint(time=time, value=value))
def update(self): if len(self.data ) and self.data[-1].time == self.data_source.data[-1].time: return last_time = self.data[-1].time if len(self.data) else 0 for index, r_peak in enumerate(self.data_source.data): if r_peak.time > last_time: start_index = index - self.number_of_peaks if index > self.number_of_peaks else 0 self.data.append( DataPoint(time=r_peak.time, value=HeartRate.calculate_heart_rate( self.data_source.data[start_index:index]))) last_time = r_peak.time
def update(self): start_index = self.get_start_index() if start_index == len(self.data_source.data): return for i in range(start_index, len(self.data_source.data)): if self.window_size is None: window = self.data_source.data[:i + 1] elif i >= self.window_size - 1: window = self.data_source.data[i - self.window_size + 1:i + 1] else: continue self.data.append( DataPoint(time=window[-1].time, value=type(self).rmssd( [data_point.value for data_point in window])))
def update(self): if len(self.data_source.data) < type(self).WINDOW_SIZE: return filtered_data = self.data_source.data[-type(self).WINDOW_SIZE:] index_base = int(len(filtered_data)/2) mean_time = (filtered_data[-1].time - filtered_data[0].time)/len(filtered_data) frequency_resolution = 1/(mean_time * type(self).WINDOW_SIZE) mean_value = np.mean([data.value for data in filtered_data]) fft = np.fft.fft(np.array([data.value - mean_value for data in filtered_data])) fft = np.array([value * self.transfer_function(type(self).index_to_freq(index, mean_time)) for index, value in enumerate(fft)]) ifft = np.fft.ifft(fft) self.data.append(DataPoint(time=filtered_data[index_base].time, value=ifft[index_base].real))
def diff_raw_USA_data(): try: open( os.path.dirname(os.path.abspath(__file__)) + '/../fetch/raw_USA.csv', 'r') open( os.path.dirname(os.path.abspath(__file__)) + '/../data/USA_data.csv') except: print('Could not access USA raw/parsed data file.') return else: country_file = open( os.path.dirname(os.path.abspath(__file__)) + '/../data/USA_data.csv') raw_data_file = open( os.path.dirname(os.path.abspath(__file__)) + '/../fetch/raw_USA.csv', 'r') last_line = '' for line in country_file: last_line = line last_date = last_line[:last_line.find(',')] new_data = [] for i, line in enumerate(raw_data_file): if (i == 0): continue parsed_line = line.split(',') curr_date = str(parsed_line[0][:4]) + '-' + str( parsed_line[0][4:6]) + '-' + str(parsed_line[0][6:]) if (time.strptime(last_date, '%Y-%m-%d') < time.strptime( curr_date, '%Y-%m-%d')): #Index Data # 0 Date # 2 Cases # 13 Deaths # 17 Tests # 11 Recovered # 6 Hospitalized new_data.insert( 0, DataPoint(curr_date, int(parsed_line[2]), int(parsed_line[13]), int(parsed_line[17]), int(parsed_line[11]), int(parsed_line[6]))) return new_data
def update(self): start_index = self.get_start_index() if start_index == len(self.data_source.data): return for i in range(start_index, len(self.data_source.data)): if self.window_size is None: window = self.data_source.data[:i + 1] elif i >= self.window_size - 1: window = self.data_source.data[i - self.window_size + 1:i + 1] else: continue try: self.data.append( DataPoint(time=window[-1].time, value=self.power(self.interpolate(window)))) except ValueError: pass
def getCellStates (filename, coords): dataPoints = [] currTime = 0 with open(filename, "r") as f: for line in f: # Update time if (Parse.isTime(line)): # If there is no cell with the given coordinates in a particular time, use the previous if (currTime > 0 and len(dataPoints) > 0 and dataPoints[-1].getTime() != currTime): dataPoints.append(DataPoint(currTime, dataPoints[-1].getConcentration())) currTime = int(line) continue # Adds cell to list if (Parse.matchesCoords(line, coords)): dataPoint = Parse.getDataPoint(currTime, line) if (dataPoint not in dataPoints): dataPoints.append(dataPoint) return dataPoints
def diff_raw_PER_data(): try: open( os.path.dirname(os.path.abspath(__file__)) + '/../fetch/raw_PER.csv', 'r') open( os.path.dirname(os.path.abspath(__file__)) + '/../data/PER_data.csv') except: print('Could not access PER raw/parsed data file.') return else: country_file = open( os.path.dirname(os.path.abspath(__file__)) + '/../data/PER_data.csv') raw_data_file = open( os.path.dirname(os.path.abspath(__file__)) + '/../fetch/raw_PER.csv', 'r') last_line = '' for line in country_file: last_line = line last_date = last_line[:last_line.find(',')] new_data = [] for i, line in enumerate(raw_data_file): if (i == 0): continue parsed_line = line.split(',') if (time.strptime(last_date, '%Y-%m-%d') < time.strptime( str(parsed_line[0]), '%Y-%m-%d')): #Index Data # 0 Date # 1 Cases # 2 Deaths # 3 Tests # 4 Recovered # 5 Hospitalized new_data.append( DataPoint(parsed_line[0], int(parsed_line[1]), int(parsed_line[2]), int(parsed_line[3]), int(parsed_line[4]), int(parsed_line[5]))) return new_data
#! /usr/bin/env python import sys from DataPoint import DataPoint wfile = open("map3.txt","w+") prevKey = None count = 0 yearSum = 0 tempSum = 0 avgYear = 0 avgTemp = 0 for line in sys.stdin: (key , value) = line.split("\t") kCentroid = DataPoint(key.strip()) dataPoints = DataPoint(value.strip()) #print("K Centroid: " + kCentroid.toString() + "\t Data Point: " + dataPoints.toString()) if prevKey is None: prevKey = kCentroid #print prevKey.toString() #print kCentroid.toString() if prevKey == kCentroid: yearSum = yearSum + dataPoints.year tempSum = tempSum + dataPoints.temp count = count + 1
#! /usr/bin/env python import sys from DataPoint import DataPoint canopyCenters = [] for line in sys.stdin: (kev,value) = line.split("\t") dp = DataPoint(value.strip()) if len(canopyCenters) == 0: canopyCenters.append(dp) else: insert = True for center in canopyCenters: if dp.checkT2(center): insert = False break if insert == True: canopyCenters.append(dp) for canopyCenter in canopyCenters: print("1\t" + canopyCenter.toString())
def processVideo(dp: DataPoint, vehicleDetector, laneLineDetector, progressTracker, stopEvent): video = Video(dp.videoPath) totalNumFrames = video.getTotalNumFrames() videoFeaturesPath = dp.videoPath.replace('videos', 'features').replace( '.avi', '.pkl') if CONFIG.USE_PRECOMPUTED_FEATURES: vehicleDetector.loadFeaturesFromDisk(videoFeaturesPath) laneLineDetector.loadFeaturesFromDisk(videoFeaturesPath) tracker = VehicleTracker() labelGen = LabelGenerator(video.getFps()) if CONFIG.MAKE_PRECOMPUTED_FEATURES: allboxes, allboxscores, allvehicles, alllines = [], [], [], [] frames = [] for frameIndex in range(totalNumFrames): if stopEvent.is_set(): print("Classifier process exited.", flush=True) return dp if CONFIG.SHOULD_LOAD_VID_FROM_DISK: isFrameAvail, frame = video.getFrame(vehicleDetector.wantsRGB) else: isFrameAvail, frame = True, None if not isFrameAvail: print('Video=' + dp.videoPath + ' returned no frame for index=' + str(frameIndex) + ' but totalNumFrames=' + str(totalNumFrames)) rawboxes, boxscores, vehicles, lines = [], [], [], [[], []] else: rawboxes, boxscores = vehicleDetector.getFeatures(frame) vehicles = tracker.getVehicles(frame, rawboxes, boxscores) lines = laneLineDetector.getLines(frame) try: labelGen.processFrame(vehicles, lines, frameIndex) except Exception as e: print(e) if CONFIG.MAKE_PRECOMPUTED_FEATURES: allboxes.append(rawboxes) allboxscores.append(boxscores) allvehicles.append(vehicles) alllines.append(lines) _updateDataPoint(dp, rawboxes, vehicles, lines) progressTracker.setCurVidProgress(frameIndex / totalNumFrames) progressTracker.incrementNumFramesProcessed() if CONFIG.MAKE_PRECOMPUTED_FEATURES: import pickle with open(videoFeaturesPath, 'wb') as file: pickle.dump([allboxes, allboxscores, alllines, allvehicles], file) dp.predictedLabels = labelGen.getLabels() dp.hasBeenProcessed = True return dp
for kCentroid in kCentroids: if (canopyCenter.checkT1(kCentroid)): #kCentroidsList.append(kCentroid.toString()) kCentroidsList.append(kCentroid) if len(kCentroidsList)>0: #canopyCenterKCentroidsDict = {canopyCenter.toString():kCentroidsList} canopyCenterKCentroidsDict[canopyCenter] = kCentroidsList for line in sys.stdin: (key,value) = line.strip().split("\t") #key = canopy center #value = data point canopyCenter = DataPoint(key) dataPoint = DataPoint(value) #print ("P1>\tcanopyCenter: " + canopyCenter.toString() + "\t dataPoint: " + dataPoint.toString()) if canopyCenter in canopyCenterKCentroidsDict: #print ("canopyCenter : " + canopyCenter.toString() + "\t" + "kCentroidsList : " + "\t dataPoint : " + dataPoint.toString()) kCentroidsList = canopyCenterKCentroidsDict[canopyCenter] if len(kCentroidsList) < 1 : continue minDistance = dataPoint.complexDistance(kCentroidsList[0]) #print("Initial minDistance : " + str(minDistance) + "\tkCentroidsList[0] : " + kCentroidsList[0].toString()) pos = 0 for i in range (1 , len(kCentroidsList)): currentDistance = dataPoint.complexDistance(kCentroidsList[i]) # print("currentDistance : " + str(currentDistance) + "\tkCentroidsList[i]" + kCentroidsList[i].toString())
def populate_list(self): for i in range(0, self.size): self.my_list.append(DataPoint(self.min, self.max))
def getDataPoint (time, line): return DataPoint(time, [int(element) for element in line[line.rfind("<") + 1:line.rfind(">")].split(",")][1])
def read_file(self): """ This method will read the CSV data file and assign to the data points property a list of the data points held in this CSV data file :return: nothing """ # Clear down any existing entries self.__dataPointsList = [] self.__dataLogList = [] # Used as flag to skip the first row from the CSV file which is header skip_header_row = False # Open the CSV data file for reading and read each text line in sequence until and of life file = open(self.filePath, "r") # Read each line from the CSV data file for line in file: # Check to see if the first row has been read if skip_header_row: # The values of the current row will be splited and save as a tuple # each value can be access by it's index # the number of indexes will be determined by how many columns are in the CSV data file row = line.split(",") # Store the values of the current row entriNo = str(row[0]) date = str(row[1]) timeStamp = str(row[2]) temperature = str(row[3]) humidity = str(row[4]) staff_code = str(row[5]) # Added on May-2019 # take the date and time values in order to create a datetime object splitDateValue = date.split( "/" ) # format: number les then 10 ---> 5/3/2019 || bigger then 10 --> 10/10/2019 splitTimeValue = timeStamp.split( ":" ) #format: if the numbers are les then 10 ---> 9:1:1 if bigger --->> 11:11:11 # this datetime object is used to get the 0 in front of the numbers that are les then 10 date_time_object = datetime(int(splitDateValue[2]), int(splitDateValue[1]), int(splitDateValue[0]), int(splitTimeValue[0]), int(splitTimeValue[1]), int(splitTimeValue[2])) timeStamp = date_time_object.time( ) #time stamp format: less then 10 --> 01:01:00 || bigger then 10 --> 10:20:30 scaleTemp = "C" # temperature measuring scale scaleHumidity = "%" # humidity measuring scale tempData = Temperature( float(temperature), scaleTemp ) #Create a temperature object and pass the necessary attributes humidityData = Humidity( float(humidity), scaleHumidity ) #Create a humidity object and pass the necessary attributes #Create a datapoint object and pass the necessary attributes data_point = DataPoint(entriNo, str(date), str(timeStamp), tempData.value, humidityData.value, staff_code) #Add the data point created into a list with data points self.dataPointsList.append(data_point.List_format()) else: #When is set to True the data from the CSV file will be read skip_header_row = True
def transformation(cls, signal): result = sum([data.value for data in signal]) / len(signal) return DataPoint(time=signal[-1].time, value=result)
# put em in lists if len(sys.argv)<3: print "Error: Insufficient Arguments" sys.exit(-1) oldCentroidsFile = hdfs.open(sys.argv[1]) newCentroidsFile = hdfs.open(sys.argv[2]) oldCentroids = [] newCentroids = [] for line in oldCentroidsFile: if line.find("\t") != -1: (key,value) = line.strip().split("\t") oldCentroid = DataPoint(value) else: oldCentroid = DataPoint(line.strip()) oldCentroids.append(oldCentroid) for line in newCentroidsFile: (key,value) = line.strip().split("\t") newCentroids.append(DataPoint(value)) # compare every element with coressponding element in the other list using complex distance of the data points. for i in range(len(oldCentroids)): #print("Distance between " + oldCentroids[i].toString() # + " and " + newCentroids[i].toString() + " : " + str(oldCentroids[i].complexDistance(newCentroids[i]))) if (oldCentroids[i].complexDistance(newCentroids[i])) > DataPoint.THRESHOLD:
#! /usr/bin/env python import sys from DataPoint import DataPoint canopyCenters = [] #taking data from the std input for line in sys.stdin: dp = DataPoint(line.strip()) if len(canopyCenters) == False: canopyCenters.append(dp) else: insert = True for center in canopyCenters: if dp.checkT2(center): insert = False break if insert == True: canopyCenters.append(dp) #printing data std output for canopyCenter in canopyCenters: print("1\t" + canopyCenter.toString())
canopyCentersFile = hdfs.open(sys.argv[1]) kCentroidsFile = hdfs.open(sys.argv[2]) kCentroids = [] canopyCenters = [] centroidList = [] canopyCenterKCentroidsDict = {} ### Setup for the mapper in Cluster Centroid Assignment Stage: ## Reading k-centroids (gen.py) and canopyCenters (mapperStg2.py) into lists: for line in kCentroidsFile: if line.find("\t") != -1: (key, value) = line.split("\t") kp = DataPoint(value.strip()) else: kp = DataPoint(line.strip()) kCentroids.append(kp) for line in canopyCentersFile: cp = DataPoint(line.strip().split("\t")[1]) canopyCenters.append(cp) ## Adding the k-centroids and canopyCenters to a dictionary: # outer loop canopy centers for canopyCenter in canopyCenters: kCentroidsList = [] for kCentroid in kCentroids: if (canopyCenter.checkT1(kCentroid)):
def changed(self, value): data = DataPoint(self.id, encode(self.encoding, value)) self.module.pass_down(data)
sys.exit(-1) file = hdfs.open(sys.argv[1]) kCentroids = [] for line in file: (key, value) = line.strip().split("\t") kCentroid = DataPoint(value) kCentroids.append(kCentroid) file.close() """ for dataPoint in dataPoints: print dataPoint.toString() """ for line in sys.stdin: dataPoint = DataPoint(line.strip()) minDistance = dataPoint.complexDistance(kCentroids[0]) pos = 0 for i in range (1, len(kCentroids)): distance = dataPoint.complexDistance(kCentroids[i]) if distance < minDistance: minDistance = distance pos = i print(kCentroids[pos].toString()+"\t"+dataPoint.toString())
def scrapeBoxScore(self, file_path): # initialize beautiful soup variable html_doc = open(file_path, "r") soup = BeautifulSoup(html_doc, 'html.parser') # load basic info split_path = file_path.split("/")[-1].split("_") self.setBasicInfo(split_path) table_arr = soup.find_all('table') inning_scores = [] table_no = 0 for table in table_arr: for child1 in table.contents: if child1.name == "tbody": for child2 in child1: if child2.name == "tr": for child3 in child2: if child3.name == "th": # create DataPoint object temp_data_point = DataPoint( table_no, child3.text) self.dataPointList.append(temp_data_point) elif child3.name == "td": if table_no == 0: inning_scores.append(child3.text) pass elif child3.name: raise ValueError(child3.name) elif child2.name: pass elif child1.name == "caption": if child1.text == "Team Score By Innings": pass elif child1.text == "Scoring Summary": pass elif "Top" in child1.text: pass elif "Bottom" in child1.text: pass elif "Composite Stats" in child1.text: pass elif "Pitching Stats" in child1.text: pass else: pass elif child1.name == "thead": pass elif child1.name == "tfoot": pass elif child1.name: raise ValueError("Missing", child1.name) table_no += 1 self.innings = int(len(inning_scores) / 2 - 3) return self
def transformation(cls, signal): sampling_time = (signal[-1].time - signal[0].time) / (cls.WINDOW_SIZE - 1) result = (-signal[0].value - 2 * signal[1].value + 2 * signal[3].value + signal[4].value) / (8 * sampling_time) return DataPoint(time=signal[2].time, value=result)