class EventDataFrame(DummyModule): # In the constructor, provide whatever arguments # you intend to play with def __init__(self, name): DummyModule.__init__(self, name) self.df = object def beginRun(self, runNumber, runRecord): self.df = DataFrame(runNumber) def printSpecificData(self, item, eventNumber, eventRecord): print(eventNumber, "Key : {} , Value : {}".format(item, eventRecord[item])) def printRawDataOutput(self, eventNumber, eventRecord): for item in eventRecord: print(eventNumber, "Key : {} , Value : {}".format(item, eventRecord[item])) def processEvent(self, runNumber, eventNumber, eventRecord): self.df.updateDataFrame(eventRecord, eventNumber) # print("Processing the DF....") # self.df.showDataFrame() def endRun(self, runNumber, runRecord): self.df.saveDataFrame()
def test_mean_add(self): x = scipy.random.randint(0, 10, 100) twox = x * 2 df = DataFrame(x=x, tx=2 * x) glob = df.scope s = df.group_by('tx').summarize(txo=mean(glob['x'] + glob['x'])) self.assertAllClose(s.txo, s.tx)
def setUp(self): self.data_frame = DataFrame(labels=test_data[0], data=test_data[1:]) self.labels = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'type' ] self.data = test_data[1:] pass
def on_load_csv(self, event): '''loads data from csv file into pandas dataFrame object''' dlg = wx.FileDialog(self, 'Select File to Open', '', '', '*.csv', wx.OPEN) if dlg.ShowModal() == wx.ID_OK: csv_file_path = dlg.GetPath() new_dataFrame = DataFrame() new_dataFrame.from_csv(csv_file_path) if self.parent.project.add_dataFrame(new_dataFrame): self.parent.history.log_new_dataFrame(new_dataFrame.source) else: self.parent.history.log_already_exists(new_dataFrame.source)
def processArrivalDataFrame(self, event_time: float, receiver: "Host", _type: str, df = None, origin = None): """ create one of the following arrival event and put it to the GEL event list internal DF: df created in this host success => schedule sense event to see if the channel is idle to process the df failure => put into the buffer external DF: df from external host to this host success => schedule receive event and then return an ACK latter failure => no failure ACK DF: ACK from external host success => take the next df from the buffer and process it failure => no failure """ sender = self success = None failure = None arrival = None if _type == "internal DF": """ Schedule next event To create a sense channel event, or put it into the buffer """ if self.status == "idle" and len(self.buffer.array)==0 and self.blocking==False: self.createSenseChannelEvent(event_time, df, "df, stage 0", df.origin) else: self.addToBuffer(df) elif _type == "external DF": """ create an ack packet, and then create a SenseChannel Event for this ack packet """ ack_time = event_time ack = DataFrame("ack", ack_time, df.sender, df.receiver, df.id, df.origin) ack.global_Id = df.global_Id ack.size = 64 self.createSenseChannelEvent(event_time, ack, "ack, stage 0", df.origin) elif _type == "ack": # if received is ack success_time = event_time success_event = SuccessTransferEvent(success_time, df, failure, df.origin) self.GEL.addEvent(success_event)
def load_data(): """ This is a custom file parser Used this because pd.read_csv() was very, very slow :return: """ # open file and set type of data under `dtype` with open(r"statistics/state_fips_master.csv") as states: state_list = pd.read_csv(states, sep=',', header=0, dtype={ 'state_name': str, 'state_abbr': str, 'long_name': str, 'fips': str, 'sumlev': str, 'region': str, 'division': str, 'state': str, 'region_name': str, 'division_name': str }) # add state name to DataFrame for line in state_list['state_name']: DataFrame.state_list.append(line) # open the data file BYAREA.txt in read mode with open(DataFrame.data_file, mode='r') as readcsv: index = 0 # for each line in the file for line in readcsv: # skip the first line (contains headers) if index == 0: index = index + 1 else: # split the line by pipe `|` symbol into list class_instance = line.rstrip("\n").split("|") # take each index in list and load it into a DataFrame # append this DataFrame into class_array DataFrame.class_instances.append( DataFrame( class_instance[0], # AREA class_instance[1], # AGE_ADJUSTED_CI_LOWER class_instance[2], # AGE_ADJUSTED_CI_UPPER class_instance[3], # AGE_ADJUSTED_RATE class_instance[4], # COUNT class_instance[5], # EVENT_TYPE float(class_instance[6]), # POPULATION class_instance[7], # RACE class_instance[8], # SEX class_instance[9], # SITE class_instance[10], # YEAR class_instance[11], # CRUDE_CI_LOWER class_instance[12], # CRUDE_CI_UPPER class_instance[13])) # CRUDE_RATE
def data_indication(destination_address, source_address, l_sdu): for segment in l_sdu: print('MENSAGEM A SER ENVIADA: ', segment) # Delimitador já é inserido no quadro # comprimento do payload length = Conversion.decimal_to_binary(len(segment)) # campo de sequencia TODO # SEQ----ACK sequence = '00000000' # destino do quadro destination_address_frame = Conversion.ip_to_binary( destination_address) # origem do quadro source_address_frame = Conversion.ip_to_binary(source_address) # dados convertidos payload = Conversion.string_to_binary(segment) # cria o frame frame = DataFrame(length, sequence, destination_address_frame, source_address_frame, payload) # Calcula o crc MyCRC = CRC.crc(frame.return_data_frame_to_crc()) frame.set__crc(MyCRC) # cria a string para enviar frame_str = frame.return_data_frame() socket_envio = MySocketTCP(destination_address) if segment == 'FIM': socket_envio.send('#') else: # Adicionar ruido ao quadro # corrompe = random.randrange(0, 2) # if corrompe == 1: # print('---houve ruido----') # frame_str = Fisica.add_noise(frame_str) # ENVIA!!! print('FRAME: ', frame_str) socket_envio.send(frame_str) print('FRAME ENVIADO') receive_ack_socket = MySocketTCP() got_it = receive_ack_socket.receive() OK = ConfirmationFrame.string_to_ConfirmationFrame(got_it) ACK = OK.last_bit_sequence() print('ACK RECEBIDO: ', ACK)
def train_split_data(dataframe, column, test_size): types_of_data = dataframe.get_types_of_data(column) data_divided_by_give_column = {} for key in types_of_data: data_divided_by_give_column[key] = dataframe.get_values_equal_to(column, key) number_of_sumples = {} for key in data_divided_by_give_column: number_of_sumples[key] = __get_number_of_samples(test_size, len(data_divided_by_give_column[key])) to_learn_dataframe = {} for key in data_divided_by_give_column: to_learn_dataframe[key] = copy(data_divided_by_give_column[key]) test_datafram = DataFrame() for key in to_learn_dataframe: for i in range(number_of_sumples[key]): index_to_test = random.randrange(0, len(to_learn_dataframe[key])) temp = to_learn_dataframe[key].pop_on_poss(index_to_test) test_datafram.data.append(temp) test_datafram.labels = dataframe.labels return test_datafram, to_learn_dataframe
def data_request(): mensagem = '' while True: socket_receber = MySocketTCP('127.0.0.1') resposta = socket_receber.receive() if resposta == '#': print('CLIENTE DESCONECTOU') break r2 = DataFrame.string_to_DataFrame(resposta) r2_payload_bin = r2.get_payload() r2_crc_recebido = r2.get_crc() crc_novo = CRC.crc(r2.return_data_frame_to_crc()) source_bin = r2.get_destination_address() destination_bin = r2.get_source_address() destination = Conversion.binary_to_ip(destination_bin) n_ack = 0 sequence = r2.first_bit_sequence() if crc_novo != r2_crc_recebido: sequence += '0000000' n_ack = 0 # manda quadro de confirmação com ack = 0 else: sequence += '0000001' n_ack = 1 mensagem += Conversion.binary_to_string(r2_payload_bin) print('MENSAGEM RECEBIDA:', Conversion.binary_to_string(r2_payload_bin)) print('FRAME OK') ack_frame = ConfirmationFrame(sequence, destination_bin, source_bin) print('ENVIANDO CONFIRMATION FRAME: ', ack_frame.return_confirmation_frame()) ack_socket = MySocketTCP(destination) ack_socket.send(ack_frame.return_confirmation_frame()) print('CONFIRMATION FRAME ENVIADO COM ACK = ', n_ack) return (mensagem)
def get_sample(self): self.dataframe = DataFrame() # get inputs and decision class self.inputs = self.dataframe.inputs self.decision = self.dataframe.decision_class try: for column, decision in zip(self.inputs[:5], self.decision[:5]): print(f'\n{column} => {decision}') except IndexError as e: print(e) answer = input( '\nPaste the sample with ";" delimitier among the values: ') if ';' not in answer: raise ValueError("The input does not contain delimiter.") values = answer.lstrip('-+').split(';') # validate if the values are numeric for value in values: testing = value.replace('.', '', 1) if not testing.isdigit(): raise ValueError("The given value is not a digit.") filtered = list(filter(lambda x: x, values)) # map the float convert values = list(map(lambda x: float(x), filtered)) print( f'Your sample:\n\tinputs: {values[1:]}\n\tdecision_class: {values[0]}' ) self.sample = values return self
def makeWindows(self): # if self.mixfr: for w in self.windows: try: w.destroy() except: pass fr = [MixtureFrame, ThermoFrame, TransportFrame] self.mixfr = MixtureFrame(self.cwin, self) self.thermo = ThermoFrame(self.cwin, self) # self.transport = TransportFrame(self.cwin, self) self.kinetics = SpeciesKineticsFrame(self.cwin, self) self.addWindow('rxndata',ReactionKineticsFrame(self.vrxn, self)) self.addWindow('rxnpaths',ReactionPathFrame(self)) self.addWindow('dataset',DataFrame(None, self)) #self.edit = EditFrame(t, self) self.windows = [self.mixfr, self.thermo, self.transport, self.kinetics] self.showthermo() self.showcomp() #self.showtransport() self.showkinetics() #self.showrxns() #self.showrpaths() #self.showdata() if self.mech: self.mechframe.grid(row=1,column=0) else: self.mechframe.grid_forget()
def test_sum_n(self): a = DataFrame(globals(), x=scipy.random.randint(0, 10, 100), y=scipy.random.randint(0, 10, 100)) s = a.group_by(x, y).summarize(n=n()) self.assertEqual(s.n.sum(), 100)
def __init__(self, _type, event_time, sender, receiver, gel, origin=None, df=None): self.type = _type self.event_time = event_time self.sender = sender self.receiver = receiver if origin == None: self.origin = sender else: self.origin = origin self.GEL = gel self.dataframe = df if self.type == "internal DF": """ The origin of an internal DF is the sender The origin of an external DF is the origin of the df The origin of an ack is the origin of the df """ arrival_time = event_time + negative_exponential_distribution( ARRIVE_RATE) df = DataFrame("data", arrival_time, self.sender, self.receiver, self.sender.ackId, origin=self.origin) df.global_Id = self.GEL.packet_counter self.arrival_time = arrival_time self.dataframe = df self.GEL.packet_counter += 1 self.sender.ackId += 1 def success(): self.sender.processArrivalDataFrame(arrival_time, self.receiver, "internal DF", df, df.origin) arrival_Event = ProcessDataFrameArrivalEvent( self.type, arrival_time, self.sender, self.receiver, df) arrival_Event.success = success self.GEL.addEvent(arrival_Event) elif self.type == "external DF": arrival_time = event_time self.arrival_time = arrival_time self.dataframe = df def success(): self.sender.processArrivalDataFrame(arrival_time, self.receiver, "external DF", df, df.origin) arrival_Event = ProcessDataFrameArrivalEvent( self.type, arrival_time, self.sender, self.receiver, df) arrival_Event.success = success self.GEL.addEvent(arrival_Event) elif self.type == "ack": arrival_time = event_time self.arrival_time = arrival_time self.dataframe = df def success(): self.sender.processArrivalDataFrame(arrival_time, self.receiver, "ack", df, df.origin) arrival_Event = ProcessDataFrameArrivalEvent( self.type, arrival_time, self.sender, self.receiver, df) arrival_Event.success = success self.GEL.addEvent(arrival_Event)
import sys import numpy as np sys.path.append('../src') from DataFrame import DataFrame from kNN import kNearestNeighbors # Initialize the dataframe object df = DataFrame() # split data to inputs and decision classess X_set, y_set = df.inputs, df.decision_class # print decision class and input columns #print(df.print_inputs()) #print(df.print_decision_class()) #print(df) # first version of knn print( f'First version of k-nearest neighbors classificator (Euclidean Metric):') knn = kNearestNeighbors(k=5, metric='euclidean') knn.OneVsRest(df.inputs, df.decision_class, version='first') print( f'First version of k-nearest neighbors classificator (Manhattan Metric):') knn = kNearestNeighbors(k=5, metric='manhattan') knn.OneVsRest(df.inputs, df.decision_class, version='first') print( f'First version of k-nearest neighbors classificator (Minkowski Metric):')
def beginRun(self, runNumber, runRecord): self.df = DataFrame(runNumber)
def test_mean(self): df = DataFrame(x=scipy.random.randint(0, 10, 100)) s = df.group_by('x').summarize(mu=mean('x')) self.assertAllClose(s.x, s.mu)
def create_from_csv(csv_path, sep=";"): data_read = CsvReader.__read_from_file(csv_path) unprocessed_data = CsvReader.__create_data_frame_from_read_data( data_read, sep=sep) return DataFrame(labels=CsvReader.create_labels(unprocessed_data), data=CsvReader.create_data(unprocessed_data))
def processArrivalDataFrame(self, event_time: float, receiver: "Host", _type: str, df = None, origin = None): """ create one of the following arrival event and put it to the GEL event list internal DF: df created in this host success => schedule sense event to see if the channel is idle to process the df failure => put into the buffer external DF: df from external host to this host success => schedule receive event and then return an ACK latter failure => no failure ACK DF: ACK from external host success => take the next df from the buffer and process it failure => no failure """ sender = self success = None failure = None arrival = None if _type == "internal DF": """ Schedule next event To create a sense channel event, or put it into the buffer """ if self.GEL.packet_counter < self.GEL.TOTAL_PACKET: new_arrival_event = ScheduleDataFrameEvent(_type, event_time, sender, receiver, self.GEL, sender) self.GEL.addEvent(new_arrival_event) if self.status == "idle": sense_event_time = event_time + self.senseTime self.createSenseChannelEvent(sense_event_time, df, "df, stage 0", df.origin) else: self.buffer.insert_dataframe(df) elif _type == "external DF": """ create an ack packet, and then create a SenseChannel Event for this ack packet """ ack_time = event_time sense_event_time = event_time + self.senseTime # print("ack", ack_time, df.sender, df.receiver, df.id, df.origin) ack = DataFrame("ack", ack_time, df.sender, df.receiver, df.id, df.origin) ack.global_Id = df.global_Id ack.size = 64 self.createSenseChannelEvent(sense_event_time, ack, "ack, stage 0", df.origin) elif _type == "ack": success_time = event_time + 0 def success(): "to get the unacked event from the notAckedDict and then acknowledge the packet" unacked = self.notACKedDict[df.id] unacked.ACKed = True if len(self.buffer.array) != 0: next_df = self.origin.buffer.popleft() self.createSenseChannelEvent(event_time, next_df, "df, stage 0", df.origin) success_event = SuccessTransferEvent(success_time, df, success, failure, df.origin) self.GEL.addEvent(success_event)
def initData(self): self.dataFrame = DataFrame()
class MainWindow(QWidget): def __init__(self): super().__init__() self.initData() self.initUI() self.createSelectGroupBox() self.createViewGroupBox() mainLayout = QVBoxLayout() mainLayout.addWidget(self.selectGroupBox) mainLayout.addWidget(self.viewGroupBox) # mainLayout.addWidget(self.messageGroupBox) self.setLayout(mainLayout) def initData(self): self.dataFrame = DataFrame() def initUI(self): QToolTip.setFont(QFont('SansSerif', 10)) self.setToolTip('This is a <b>QWidget</b> widget') # btn = QPushButton('Button', self) # btn.setToolTip('This is a <b>QPushButton</b> widget') # btn.resize(btn.sizeHint()) # btn.move(50, 50) self.setGeometry(300, 300, 640, 480) self.setWindowTitle('NumpyViewer') self.show() def createSelectGroupBox(self): ''' 用于文件加载模块的控制面板。 ''' # create groupbox self.selectGroupBox = QGroupBox("Select File") self.selectGroupBox.setFixedHeight(80) # create widgets self.selectButton = QPushButton('Select a file') # widgets connect self.selectButton.clicked.connect(self.selectButtonClicked) # layout selectLayout = QHBoxLayout() selectLayout.addWidget(self.selectButton) self.selectGroupBox.setLayout(selectLayout) def createViewGroupBox(self): ''' 用于创建查看区,由图像和查看按钮组成。 ''' # create groupbox self.viewGroupBox = QGroupBox("View") # create widgets # self.viewBox = QLabel('Show Image') # QLabel is widget, but QPixmap is not. # self.viewBox.setPixmap(QPixmap('./64.png')) self.static_canvas = FigureCanvas(Figure(figsize=(10, 10))) self.viewPlot = self.static_canvas.figure.subplots() self.lastButton = QPushButton('Last') self.nextButton = QPushButton('Next') self.levelLabel = QLabel('level:') # Show level # connect buttons self.nextButton.clicked.connect(self.nextButtonClicked) self.lastButton.clicked.connect(self.lastButtonClicked) # layout viewLayout = QHBoxLayout() # viewLayout.addWidget(self.viewBox) viewLayout.addWidget(self.static_canvas) ## sub layout viewButtonsLayout = QVBoxLayout() viewButtonsLayout.addWidget(self.lastButton) viewButtonsLayout.addWidget(self.nextButton) viewButtonsLayout.addWidget(self.levelLabel) viewLayout.addLayout(viewButtonsLayout) self.viewGroupBox.setLayout(viewLayout) # self.viewLabel = QLabel("View:o") # self.iconComboBox = QComboBox() # self.iconComboBox.addItem(QIcon(':/images/bad.png'), "Bad") # self.iconComboBox.addItem(QIcon(':/images/heart.png'), "Heart") # self.iconComboBox.addItem(QIcon(':/images/trash.png'), "Trash") # self.showIconCheckBox = QCheckBox("Show icon") # self.showIconCheckBox.setChecked(True) # iconLayout = QHBoxLayout() # iconLayout.addWidget(self.iconLabel) # iconLayout.addWidget(self.iconComboBox) # iconLayout.addStretch() # iconLayout.addWidget(self.showIconCheckBox) # self.iconGroupBox.setLayout(iconLayout) def update_level_label(self): self.levelLabel.setText('level:' + str(self.dataFrame.index)) def selectButtonClicked(self): fileName1, filetype = QFileDialog.getOpenFileName( self, "选取Numpy矩阵文件", "./", "Numpy Files (*.npy)") #设置文件扩展名过滤,注意用双分号间隔 if fileName1 is not None and fileName1 != '': self.dataFrame.path = fileName1 self.dataFrame.ReadFile() self.dataFrame.index = 0 self.dataFrame.ShowInPlot(self.viewPlot) self.update_level_label() # draw something # self.viewPlot = self.static_canvas.figure.subplots() # height, width= self.dataFrame.dataArray[0].shape # self.viewBox.setPixmap(QPixmap()) # print(fileName1) # print(filetype) def nextButtonClicked(self): if (self.dataFrame.index + 1 >= self.dataFrame.dataArray.shape[0]): pass else: self.dataFrame.index = self.dataFrame.index + 1 self.dataFrame.ShowInPlot(self.viewPlot) self.update_level_label() def lastButtonClicked(self): if (self.dataFrame.index - 1 < 0): pass else: self.dataFrame.index = self.dataFrame.index - 1 self.dataFrame.ShowInPlot(self.viewPlot) self.update_level_label()
from DataFrame import DataFrame from itertools import product if __name__ == "__main__": df = DataFrame.read_csv(csv_path='test.csv') raw = df.copy() raw.select.where("description").prefix("PAU")().print(10) df["value"] = list(map(float, df["value"])) df["sp_value"] = list(map(float, df["sp_value"])) df.select.where("description").prefix("PAU").where("sp_value").equal(70).where("value").Not.between(69.8, 70.2)().sort("value").print(10) print(df.select.where("description").contain("R").count()) # raw.append_csv("test.csv") print(df[[3, 5]]) df[["time", "value"]].print(10, highlight_rows=[1, 2, 3, 4]) lst = list(range(10)) mat = [[i, j, k] for i, j, k in product(lst, lst, lst)] df = DataFrame.read_matrix(matrix=mat) print(df.variance("col_1")) a = DataFrame.read_matrix(matrix=[[1, 1, 1], [2, 2, 2], [3, 3, 3]]) b = a.copy() a[2] = [123, 234, 345] c = a + b d = c - a a.print() b.print() c.print() d.print() d = {"date": "2019-01-01", "person": "Mark", "value": 20}
def __init__(self, _type, event_time, sender, receiver, gel, origin=None, df=None): self.type = _type self.event_time = event_time self.sender = sender self.receiver = receiver if origin == None: self.origin = sender else: self.origin = origin self.GEL = gel self.dataframe = df self.result_description = "" if self.type == "internal DF": """ The origin of an internal DF is the sender The origin of an external DF is the origin of the df The origin of an ack is the origin of the df """ config = configparser.ConfigParser() config.read("configuration_file.ini") ARRIVE_RATE = float(config["DEFAULT"]["ARRIVE_RATE"]) arrival_time = event_time + negative_exponential_distribution( ARRIVE_RATE) df = DataFrame("data", arrival_time, self.sender, self.receiver, self.sender.ackId, origin=self.origin) df.global_Id = self.GEL.packet_counter df.ACKed = False self.arrival_time = arrival_time self.dataframe = df self.GEL.packet_counter += 1 self.GEL.packet_array.append(df) self.sender.ackId += 1 def success(): self.sender.processArrivalDataFrame(arrival_time, self.receiver, "internal DF", df, df.origin) arrival_Event = ProcessDataFrameArrivalEvent( self.type, arrival_time, self.sender, self.receiver, df) arrival_Event.success = success self.arrival_time += 0.00000001 self.GEL.addEvent(arrival_Event) elif self.type == "external DF": arrival_time = event_time + 0.00000001 self.arrival_time = arrival_time self.dataframe = df def success(): self.sender.processArrivalDataFrame(arrival_time, self.receiver, "external DF", df, df.origin) arrival_Event = ProcessDataFrameArrivalEvent( self.type, arrival_time, self.sender, self.receiver, df) arrival_Event.success = success self.GEL.addEvent(arrival_Event) elif self.type == "ack": arrival_time = event_time + 0.00000001 original_sender = self.sender original_receiver = self.receiver self.sender = original_receiver self.receiver = original_sender self.arrival_time = arrival_time self.dataframe = df def success(): """ The sender process the arrived ack, and then """ self.sender.processArrivalDataFrame(arrival_time, self.receiver, "ack", df, df.origin) arrival_Event = ProcessDataFrameArrivalEvent( self.type, arrival_time, self.sender, self.receiver, df) arrival_Event.success = success self.GEL.addEvent(arrival_Event)
class TestDataFrame(TestCase): def setUp(self): self.data_frame = DataFrame(labels=test_data[0], data=test_data[1:]) self.labels = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'type' ] self.data = test_data[1:] pass def test_indexing(self): self.data_frame.reindex() for index, item in enumerate(self.data_frame): self.assertEqual(index, item.index) def test_createDataFrame_labels(self): self.assertEqual(len(self.data_frame.labels), len(test_data[0])) def test_createDataFrame_data(self): self.assertEqual(len(self.data_frame.data), len(test_data) - 1) def test_label_correct(self): for item_loc, item_to_test in zip(self.labels, self.data_frame.labels): self.assertEqual(item_loc, item_to_test) def test_data_correct(self): for line_loc, line_to_test in zip(test_data[1:], self.data_frame.data): for item_loc, item_to_test in zip(line_loc, line_to_test): self.assertEqual(item_loc, item_to_test) def test_get_types_from_data_frame_by_index_last_column(self): types = self.data_frame.get_types_of_data(-1) a = {'SML', "MID", "BIG"} for key in types: a.remove(key) self.assertEqual(0, len(a)) def test_get_types_from_data_frame_by_label_name(self): types = self.data_frame.get_types_of_data('type') a = {'SML', "MID", "BIG"} for key in types: a.remove(key) self.assertEqual(0, len(a)) def test_get_types_from_data_frame_by_label_a_column(self): types = self.data_frame.get_types_of_data('a') a = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0} for key in types: a.remove(key) self.assertEqual(0, len(a)) def test_indexing_check_labels_01(self): data = self.data_frame[1, 2] for item_loc, item_to_check in zip(self.labels[1], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in indexing') def test_indexing_check_labels_02(self): data = self.data_frame[0, 2] for item_loc, item_to_check in zip(self.labels[0], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in indexing') def test_indexing_check_labels_03(self): data = self.data_frame[-1, 2] for item_loc, item_to_check in zip(self.labels[-1], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in indexing') def test_indexing_check_labels_04(self): data = self.data_frame[-5, 2] for item_loc, item_to_check in zip(self.labels[-5], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in indexing') def test_indexing_check_data_01(self): data = self.data_frame[:, 1] for item_loc, item_to_check in zip(self.data[1], data.data[0]): self.assertEqual(item_loc, item_to_check, 'wrong data in indexing') def test_indexing_check_data_02(self): data = self.data_frame[:, -1] for item_loc, item_to_check in zip(self.data[-1], data.data[0]): self.assertEqual(item_loc, item_to_check, 'wrong data in indexing') def test_indexing_check_data_03(self): data = self.data_frame[:, 100] for item_loc, item_to_check in zip(self.data[100], data.data[0]): self.assertEqual(item_loc, item_to_check, 'wrong data in indexing') def test_slicing_only_col_check_labels_01(self): data = self.data_frame[1:] for item_loc, item_to_check in zip(self.labels[1:], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in slicing') def test_slicing_only_col_check_labels_02(self): data = self.data_frame[:-1] for item_loc, item_to_check in zip(self.labels[:-1], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in slicing') def test_slicing_only_col_check_labels_03(self): data = self.data_frame[1:-1] for item_loc, item_to_check in zip(self.labels[1:-1], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in slicing') def test_slicing_only_col_check_labels_04(self): data = self.data_frame[2:5] for item_loc, item_to_check in zip(self.labels[2:5], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in slicing') def test_slicing_check_labels_01(self): data = self.data_frame[2:5, 2] for item_loc, item_to_check in zip(self.labels[2:5], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in slicing') def test_slicing_check_labels_02(self): data = self.data_frame[:, 2] for item_loc, item_to_check in zip(self.labels, data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in slicing') def test_slicing_check_labels_03(self): data = self.data_frame[3:, 2] for item_loc, item_to_check in zip(self.labels[3:], data.labels): self.assertEqual(item_loc, item_to_check, 'wrong labels in slicing') def test_slicing_check_data_01(self): data = self.data_frame[:, 1:10] for line_loc, line_to_check in zip(self.data[1:10], data.data): for item_loc, item_to_check in zip(line_loc, line_to_check): self.assertEqual(item_loc, item_to_check, 'wrong data in slicing') def test_slicing_check_data_02(self): data = self.data_frame[:, 12:120] for line_loc, line_to_check in zip(self.data[12:120], data.data): for item_loc, item_to_check in zip(line_loc, line_to_check): self.assertEqual(item_loc, item_to_check, 'wrong data in slicing') def test_slicing_check_data_03(self): data = self.data_frame[:, 19:-5] for line_loc, line_to_check in zip(self.data[19:-5], data.data): for item_loc, item_to_check in zip(line_loc, line_to_check): self.assertEqual(item_loc, item_to_check, 'wrong data in slicing') def test_slicing_check_data_04(self): data = self.data_frame[:4, 19:-5] for line_loc, line_to_check in zip(self.data[19:-5], data.data): for item_loc, item_to_check in zip(line_loc[:4], line_to_check): self.assertEqual(item_loc, item_to_check, 'wrong data in slicing') def test_slicing_check_data_05(self): data = self.data_frame[1:6, 54:120] for line_loc, line_to_check in zip(self.data[54:120], data.data): for item_loc, item_to_check in zip(line_loc[1:6], line_to_check): self.assertEqual(item_loc, item_to_check, 'wrong data in slicing') def test_swap_values_one(self): self.data_frame.swap_values('type', {"MID": "@MID@"}) types = self.data_frame.get_types_of_data('type') self.assertTrue("@MID@" in types) def test_swap_values_all(self): self.data_frame.swap_values('type', { "SML": "TINY", "MID": "@MID@", "BIG": "HUGE" }) types = self.data_frame.get_types_of_data('type') self.assertTrue("@MID@" in types) self.assertTrue("TINY" in types) self.assertTrue("HUGE" in types) self.assertTrue("SML" not in types) self.assertTrue("MID" not in types) self.assertTrue("BIG" not in types) def test_check_if_correct_values_are_teken(self): data = self.data_frame.get_values_equal_to('type', "SML") self.assertEqual(len(data.data), 50) data_sliced = data[-1, :] for item in data_sliced.data: self.assertEqual(item[0], 'SML') def test_pop_item_int_index_0(self): data_raw = self.data_frame.pop_item(0) expected_values = [0] * 10 + ['SML'] for exp_item, test_item in zip(expected_values, data_raw): self.assertEqual(exp_item, test_item) def test_pop_item_int_index_50(self): data_raw = self.data_frame.pop_item(50) expected_values = [0] * 10 + ['MID'] for exp_item, test_item in zip(expected_values, data_raw): self.assertEqual(exp_item, test_item) def test_pop_item_int_index_100(self): data_raw = self.data_frame.pop_item(100) expected_values = [0] * 10 + ['BIG'] for exp_item, test_item in zip(expected_values, data_raw): self.assertEqual(exp_item, test_item) def test_len_of_data_for_poping(self): items_in_frame = len(self.data_frame) for item in range(items_in_frame): self.assertEqual(len(self.data_frame), items_in_frame - (item)) self.data_frame.pop_item(item) self.assertEqual(len(self.data_frame), 0) def test_copy_dataframe_if_is_correct_instance(self): copied_dataframe = copy(self.data_frame) self.assertTrue(isinstance(copied_dataframe, DataFrame)) def test_copy_correct_values(self): copied_dataframe = copy(self.data_frame) for rawdata_base, rawdata_copied in zip(self.data_frame, copied_dataframe): self.assertEquals(rawdata_base.index, rawdata_copied.index) for item_base, item_copied in zip(rawdata_base, rawdata_copied): self.assertEquals(item_base, item_copied) def test_if_inner_objects_are_diffrent_data(self): copied_dataframe = copy(self.data_frame) copied_dataframe.data[0][0] = "Mu point" self.assertEquals(copied_dataframe.data[0].index, self.data_frame.data[0].index) self.assertNotEquals(copied_dataframe.data[0][0], self.data_frame.data[0][0]) def test_if_inner_objects_are_diffrent_lables(self): copied_dataframe = copy(self.data_frame) copied_dataframe.labels[0] = "Mu point" self.assertNotEquals(copied_dataframe.labels[0], self.data_frame.labels[0]) def test_check_pop_by_position(self): self.data_frame.pop_on_poss(50) self.assertEqual(self.data_frame.data[50].index, 51)
from DataFrame import DataFrame from PlotGeneration import PlotGeneration from LoadData import load_data from StatisticalTest import StatisticalTest if __name__ == "__main__": print("Loading data frames. . .") load_data() # load data print("Loading statistics. . .") DataFrame.FemaleIncidenceRate() # generate female incidence rates DataFrame.FemaleMortalityRate() # generate female mortality rates DataFrame.MaleIncidenceRate() # generate male incidence rates DataFrame.MaleMortalityRate() # generate male morality rates print("Generating CSV file. . .") PlotGeneration.create_csv() # generate the CSV file to create the map print("Generating map. . .") PlotGeneration.generate_choropleth() # map creation PlotGeneration.generate_sankey() # sankey plot generation PlotGeneration.generate_boxplot() print("Calculating statistics. . .\n" ) # T-Test generation, calculating averages StatisticalTest() StatisticalTest.box_plot_statistics( PlotGeneration.map_data['total_incidence'], PlotGeneration.map_data['state_name'], 'incidence_rate') print("")
def __init__(self, master=None): self.master = master DataFrame.__init__(self, sp.load_fields, sp.load_field_types)
def test_sd(self): df = DataFrame(x=scipy.random.randint(0, 10, 100)) s = df.group_by('x').summarize(sd=sd('x')) self.assertAllClose(0, s.sd)