def test_getAllItemsInAuction_Ordering(self): datasetAuction = Dataset( self.logger, './', self.sessionFile.getFilename(), self.itemFileAuctionOnly.getFilename(), self.currencyFile.getFilename()) datasetAuction.restore() modelAuction = Model( self.logger, datasetAuction, self.currency) auctionItems = modelAuction.getAllItemsInAuction() auctionItems.sort(key=lambda item: item[ItemField.AUCTION_SORT_CODE]) for item in auctionItems: print('{0} - {1}'.format(item[ItemField.AUTHOR], item[ItemField.AMOUNT])) # Check that there is no block authors larger than two largestBlockSize = 0 largestBlockAuthor = None blockAuthor = None blockSize = 0 for item in auctionItems: if blockAuthor is not None and item[ItemField.AUTHOR] == blockAuthor: blockSize = blockSize + 1 else: if blockSize > largestBlockSize: largestBlockSize = blockSize largestBlockAuthor = blockAuthor blockAuthor = item[ItemField.AUTHOR] blockSize = 1 self.assertGreaterEqual(2, largestBlockSize, 'Author: ' + str(largestBlockAuthor))
def __init__(self): # Attributes # General self.data = Dataset() self.activity_label = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', ] # Views self.results_view = None self.data_import_view = None self.pca_graphics_view = None self.pca_utilization_view = None self.choose_classifier_view = None self.feature_selection_view = None self.feature_selection_view = None
def setUp(self): self.logger = logging.getLogger() self.testFiles = [] self.itemFile = Datafile('test.model.items.xml', self.id()) self.itemFileAuctionOnly = Datafile('test.model.items.auction_only.xml', self.id()) self.sessionFile = Datafile('test.model.session.xml', self.id()) self.currencyFile = Datafile('test.model.currency.xml', self.id()) self.importFileCsv = Datafile('test.model.import.csv', self.id()) self.importFileTxt = Datafile('test.model.import.txt', self.id()) self.dataset = Dataset( self.logger, './', self.sessionFile.getFilename(), self.itemFile.getFilename(), self.currencyFile.getFilename()) self.dataset.restore() self.currency = Currency( self.logger, self.dataset, currencyCodes=['czk', 'eur']) self.model = Model( self.logger, self.dataset, self.currency)
def setUp(self): self.logger = logging.getLogger() self.itemFile = Datafile('test.model.items.xml', self.id()) self.sessionFile = Datafile('test.model.session.xml', self.id()) self.currencyFile = Datafile('test.model.currency.xml', self.id()) self.dataset = Dataset(self.logger, './', self.sessionFile.getFilename(), self.itemFile.getFilename(), self.currencyFile.getFilename())
def load_data(self, dataset=None, batch=128): self.data = Dataset(dataset=dataset, batch=batch) self.hyper["num_train"] = len(self.data.y["train"]) self.hyper["num_val"] = len(self.data.y["valid"]) self.hyper["num_test"] = len(self.data.y["test"]) self.hyper["target_size"] = self.data.target_size self.hyper["molecule_size"] = self.data.molecule_size self.hyper["num_features"] = self.data.num_features self.hyper["task"] = self.data.task self.hyper["outputs"] = self.data.outputs self.hyper["batch"] = batch print("finish loading data with batch size", batch)
def __init__(self): super().__init__() self.ui = Ui_Dataset() self.ui.setupUi(self) self.all_thumbnails = [] self.selected_thumbnails: Set[Thumbnail] = set() self.ui.image_list_widget.itemSelectionChanged.connect( self.on_changed_image_list_selection) self.ui.delete_images_button.clicked.connect( self.on_clicked_delete_images_button) self.ui.train_button.clicked.connect(self.on_clicked_train_button) self.ui.camera_and_images_menu = QMenu() self.ui.camera_and_images_menu.addAction(self.ui.select_images_action) self.ui.camera_and_images_menu.addAction(self.ui.camera_action) self.ui.camera_and_images_button.setMenu( self.ui.camera_and_images_menu) self.ui.select_images_action.triggered.connect( self.on_clicked_select_images_button) self.ui.camera_action.triggered.connect(self.on_clicked_camera_button) self.ui.image_list_widget.setCurrentItem( self.ui.image_list_widget.topLevelItem(0).child( 0)) # FIXME: refactor self.ui.image_list_widget.expandAll() self._reload_images(Dataset.Category.TRAINING_OK) self.__reload_recent_training_date() self.capture_dialog: Optional[ImageCaptureDialog] = None self.preview_window = PreviewWindow() self.watcher = QFileSystemWatcher(self) self.watcher.addPaths([ str(Dataset.images_path(Dataset.Category.TRAINING_OK)), str(Dataset.images_path(Dataset.Category.TEST_OK)), str(Dataset.images_path(Dataset.Category.TEST_NG)) ]) self.watcher.directoryChanged.connect( self.on_dataset_directory_changed) self.select_area_dialog = None self.msgBox = None LearningModel.default().training_finished.connect( self.on_finished_training)
def train(self): self.__model = NoveltyDetector() # FIXME: cannot update weights without reinitialization... self.__model.fit_in_dir(str(Dataset.trimmed_path(Dataset.Category.TRAINING_OK))) self.__model.save(LearningModel.__weight_file_path(cam_index=0)) Project.save_latest_training_date() self.__should_test = True self.training_finished.emit()
def on_finished_selecting_area(self, data: TrimmingData): categories = [ Dataset.Category.TRAINING_OK, Dataset.Category.TEST_OK, Dataset.Category.TEST_NG ] truncated_image_paths = [] for category in categories: dir_path = Dataset.images_path(category) save_path = Dataset.trimmed_path(category) if os.path.exists(save_path): shutil.rmtree(save_path) os.mkdir(save_path) if not data.needs_trimming: copy_tree(str(dir_path), str(save_path)) else: file_list = os.listdir(dir_path) file_list = [ img for img in file_list if Path(img).suffix in ['.jpg', '.jpeg', '.png', '.gif', '.bmp'] ] for file_name in file_list: truncated_image_path = Dataset.trim_image( os.path.join(dir_path, file_name), save_path, data) if truncated_image_path: file_name = os.path.basename(truncated_image_path) shutil.move( truncated_image_path, os.path.join( Dataset.images_path( Dataset.Category.TRUNCATED), file_name)) truncated_image_paths.append(truncated_image_path) Project.save_latest_trimming_data(data) # alert for moving truncated images if truncated_image_paths: self.msgBox = QMessageBox() self.msgBox.setText(str(len(truncated_image_paths))+'枚の画像を読み込めませんでした. これらの画像はtruncatedフォルダに移動されました.\n\n'\ + 'このままトレーニングを開始しますか?') self.msgBox.setStandardButtons(self.msgBox.Yes | self.msgBox.No) self.msgBox.setDefaultButton(self.msgBox.Yes) reply = self.msgBox.exec() if reply == self.msgBox.No: return # start training LearningModel.default().start_training()
def test(self, predict_training=False): try: _, pred_of_ok_images = self.__model.predict_in_dir(str(Dataset.trimmed_path(Dataset.Category.TEST_OK))) _, pred_of_ng_images = self.__model.predict_in_dir(str(Dataset.trimmed_path(Dataset.Category.TEST_NG))) if predict_training: _, pred_of_train_images = self.__model.predict_in_dir(str(Dataset.trimmed_path(Dataset.Category.TRAINING_OK))) self.test_results.reload(distances_of_ok_images=pred_of_ok_images, distances_of_ng_images=pred_of_ng_images, distances_of_train_images=pred_of_train_images) else: self.test_results.reload(distances_of_ok_images=pred_of_ok_images, distances_of_ng_images=pred_of_ng_images) if self.test_results.distances_of_ng_images.size != 0: self.threshold = max(self.test_results.distances_of_ng_images.max(), np.percentile(self.test_results.distances_of_ok_images, 0.13)) # default threshold is the larger of max NG distance and 0.13 percentile (-3 sigma) of OK distances self.__should_test = False except IndexError: # TODO: handle as UndoneTrainingError print('TODO: tell the user to train') except OSError: print('TODO: repair directory for test images') finally: self.test_finished.emit(predict_training)
def setUp(self): self.logger = logging.getLogger() self.itemFile = Datafile('test.model.items.xml', self.id()) self.sessionFile = Datafile('test.model.session.xml', self.id()) self.currencyFile = Datafile('test.model.currency.xml', self.id()) self.dataset = Dataset( self.logger, './', self.sessionFile.getFilename(), self.itemFile.getFilename(), self.currencyFile.getFilename()) self.dataset.restore() self.currency = Currency( self.logger, self.dataset, currencyCodes=['czk', 'eur', 'usd'])
def on_clicked_camera_button(self): selected_category = self.__selected_dataset_category() if selected_category is None: print('TODO: disable to select other items') return del self.capture_dialog self.capture_dialog = ImageCaptureDialog( image_save_location=str(Dataset.images_path(selected_category))) self.capture_dialog.show()
def about(self): """ Show information about this program. :return: None. """ title = "About Biodiversity Analysis" content = Dataset.license() self.mainWindow.alert(title, content, 4)
def start_predict(self, image_paths): image_path = image_paths[0] trimming_data = Project.latest_trimming_data() truncated_image_path = Dataset.trim_image(image_path, os.path.dirname(image_path), trimming_data) if truncated_image_path: return truncated_image_path self.predicting_start.emit() predict_thread = threading.Thread(target=self.predict, args=([image_paths])) predict_thread.start() return
def on_clicked_train_button(self): img_suffix_list = ['.jpg', '.jpeg', '.png', '.gif', '.bmp'] if not [ img for img in os.listdir( Dataset.images_path(Dataset.Category.TEST_NG)) if Path(img).suffix in img_suffix_list ]: self.msgBox = QMessageBox() self.msgBox.setText( '性能評価用の不良品画像フォルダが空です.\nトレーニングを開始するには不良品画像を1枚以上追加してください.') self.msgBox.exec() return elif not [ img for img in os.listdir( Dataset.images_path(Dataset.Category.TEST_OK)) if Path(img).suffix in img_suffix_list ]: self.msgBox = QMessageBox() self.msgBox.setText( '性能評価用の良品画像フォルダが空です.\nトレーニングを開始するには良品画像を1枚以上追加してください.') self.msgBox.exec() return elif not [ img for img in os.listdir( Dataset.images_path(Dataset.Category.TRAINING_OK)) if Path(img).suffix in img_suffix_list ]: self.msgBox = QMessageBox() self.msgBox.setText( 'トレーニング用の良品画像フォルダが空です.\nトレーニングを開始するには良品画像を1枚以上追加してください.') self.msgBox.exec() return del self.select_area_dialog self.select_area_dialog = SelectAreaDialog() self.select_area_dialog.finish_selecting_area.connect( self.on_finished_selecting_area) self.select_area_dialog.show() self.__reload_recent_training_date()
def setUp(self): self.logger = logging.getLogger() self.itemFile = Datafile('test.model.items.xml', self.id()) self.sessionFile = Datafile('test.model.session.xml', self.id()) self.currencyFile = Datafile('test.model.currency.xml', self.id()) self.dataset = Dataset( self.logger, './', self.sessionFile.getFilename(), self.itemFile.getFilename(), self.currencyFile.getFilename())
def train(self): dirs = self.config.DATA_DIR live_dir = self.config.DATA_DIR_LIVE[0] while True: for dir in dirs: train_dirs = [d for d in dirs if d != dir] train_dirs.append(live_dir) train = Dataset(self.config, 'train', train_dirs, dir) epochs = int((self.config.MAX_EPOCH % len(dirs)) / len(dirs)) + self.config.MAX_EPOCH self._train(train, self.last_epoch + epochs) self.last_epoch += epochs
def main(argv=None): # Configurations config = Config(gpu='1', root_dir='./data/test/', root_dir_val=None, mode='testing') config.BATCH_SIZE = 1 # Get images and labels. dataset_test = Dataset(config, 'test') # Train _M, _s, _b, _C, _T, _imname = _step(config, dataset_test, False) # Add ops to save and restore all the variables. saver = tf.train.Saver(max_to_keep=50,) with tf.Session(config=config.GPU_CONFIG) as sess: # Restore the model ckpt = tf.train.get_checkpoint_state(config.LOG_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) last_epoch = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('**********************************************************') print('Restore from Epoch '+str(last_epoch)) print('**********************************************************') else: init = tf.initializers.global_variables() last_epoch = 0 sess.run(init) print('**********************************************************') print('Train from scratch.') print('**********************************************************') step_per_epoch = int(len(dataset_test.name_list) / config.BATCH_SIZE) with open(config.LOG_DIR + '/test/score.txt', 'w') as f: for step in range(step_per_epoch): M, s, b, C, T, imname = sess.run([_M, _s, _b, _C, _T, _imname]) # save the score for i in range(config.BATCH_SIZE): _name = imname[i].decode('UTF-8') _line = _name + ',' + str("{0:.3f}".format(M[i])) + ','\ + str("{0:.3f}".format(s[i])) + ','\ + str("{0:.3f}".format(b[i])) + ','\ + str("{0:.3f}".format(C[i])) + ','\ + str("{0:.3f}".format(T[i])) f.write(_line + '\n') print(str(step+1)+'/'+str(step_per_epoch)+':'+_line, end='\r') print("\n")
def main(argv=None): # Configurations config = Config() config.DATA_DIR = ['/data/'] config.LOG_DIR = './log/model' config.MODE = 'training' config.STEPS_PER_EPOCH_VAL = 180 config.display() # Get images and labels. dataset_train = Dataset(config, 'train') # Build a Graph model = Model(config) # Train the model model.compile() model.train(dataset_train, None)
def test(self): dirs = self.config.DATA_DIR_TEST dataset = Dataset(self.config, 'test', dirs) for image, dmap, labels in dataset.feed: dmap_pred, cls_pred, route_value, leaf_node_mask = self.dtn( image, labels, False) # leaf counts spoof_counts = [] for leaf in leaf_node_mask: spoof_count = tf.reduce_sum(leaf[:, 0]).numpy() spoof_counts.append(int(spoof_count)) cls_total = tf.math.add_n(cls_pred) / len(cls_pred) index = 0 for label in tf.unstack(labels): cls = cls_total[index].numpy() if cls < 0.8 or cls > 1.2: logging.info("label: {}, cls: {}".format( label.numpy(), cls)) index += 1
def get_ng_sample_image_path(self): test_ng_path = str(Dataset.images_path(Dataset.Category.TEST_NG)) test_ng_images = os.listdir(test_ng_path) test_ng_images = [ img for img in test_ng_images if Path(img).suffix in ['.jpg', '.jpeg', '.png', '.gif', '.bmp'] ] if not test_ng_images: return original_image_path = os.path.join(test_ng_path, test_ng_images[0]) original_image = cv2.imread(original_image_path) h, w, c = original_image.shape self.h, self.w = h, w original_image_shape = QSize(w + 2, h + 10) original_image_item = QGraphicsPixmapItem(QPixmap(original_image_path)) original_image_item.setZValue(0) self.original_image_scene = QGraphicsScene() self.original_image_scene.addItem(original_image_item) self.ui.original_image_view.setScene(self.original_image_scene) self.ui.original_image_view.setBaseSize(original_image_shape) self.ui.original_image_view.setMaximumSize(original_image_shape) self.resize(self.w + 32, self.h + 72)
def _reload_images(self, category: Dataset.Category): # reset selection self.selected_thumbnails.clear() self.ui.delete_images_button.setEnabled(False) # reset grid area contents current_images_count = self.ui.images_grid_area.count() if current_images_count > 0: for i in reversed(range(current_images_count)): self.ui.images_grid_area.itemAt(i).widget().setParent(None) image_paths = sorted(Dataset.images_path(category).iterdir()) nullable_thumbnails = [ Thumbnail(path=image_path) for image_path in image_paths ] self.all_thumbnails = [ thumbnail for thumbnail in nullable_thumbnails if not thumbnail.pixmap.isNull() ] self.ui.number_of_images_label.setText(f'{len(self.all_thumbnails)}枚') row = 0 column = 0 for thumbnail in self.all_thumbnails: thumbnail_cell = ThumbnailCell(thumbnail=thumbnail) thumbnail_cell.selection_changed.connect( self.on_changed_thumbnail_selection) thumbnail_cell.double_clicked.connect( self.on_double_clicked_thumbnail) self.ui.images_grid_area.addWidget(thumbnail_cell, row, column) if column == 4: row += 1 column = 0 else: column += 1
def on_clicked_select_images_button(self): selected_category = self.__selected_dataset_category() if selected_category is None: print('TODO: disable to select other items') return ext_filter = '画像ファイル(*.jpg *.jpeg *.png *.gif *.bmp)' source_image_names = QFileDialog.getOpenFileNames( caption='データセットに取り込む', filter=ext_filter, directory=Project.latest_dataset_image_path())[0] Project.save_latest_dataset_image_path( os.path.dirname(source_image_names[0])) if source_image_names: for source_image_name in source_image_names: try: # TODO: specify correct camera number destination = Dataset.generate_image_path( category=selected_category, cam_number=0, file_extension=Path(source_image_name).suffix) shutil.copyfile(source_image_name, destination) except shutil.SameFileError: print("TODO: fix destination")
def main(): global TRAIN_FROM_CHECKPOINT gpus = tf.config.experimental.list_physical_devices('GPU') print(f'GPUs {gpus}') if len(gpus) > 0: try: tf.config.experimental.set_memory_growth(gpus[0], True) except RuntimeError: pass if os.path.exists(TRAIN_LOGDIR): shutil.rmtree(TRAIN_LOGDIR) writer = tf.summary.create_file_writer(TRAIN_LOGDIR) trainset = Dataset('train') testset = Dataset('test') steps_per_epoch = len(trainset) global_steps = tf.Variable(1, trainable=False, dtype=tf.int64) warmup_steps = TRAIN_WARMUP_EPOCHS * steps_per_epoch total_steps = TRAIN_EPOCHS * steps_per_epoch if TRAIN_TRANSFER: Darknet = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES) load_yolo_weights(Darknet, Darknet_weights) # use darknet weights yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, training=True, CLASSES=TRAIN_CLASSES) if TRAIN_FROM_CHECKPOINT: try: yolo.load_weights(f"{TRAIN_CHECKPOINTS_FOLDER}/{TRAIN_MODEL_NAME}") except ValueError: print("Shapes are incompatible, transfering Darknet weights") TRAIN_FROM_CHECKPOINT = False if TRAIN_TRANSFER and not TRAIN_FROM_CHECKPOINT: for i, l in enumerate(Darknet.layers): layer_weights = l.get_weights() if layer_weights != []: try: yolo.layers[i].set_weights(layer_weights) except: print("skipping", yolo.layers[i].name) optimizer = tf.keras.optimizers.Adam() def train_step(image_data, target): with tf.GradientTape() as tape: pred_result = yolo(image_data, training=True) giou_loss = conf_loss = prob_loss = 0 # optimizing process grid = 3 if not TRAIN_YOLO_TINY else 2 for i in range(grid): conv, pred = pred_result[i * 2], pred_result[i * 2 + 1] loss_items = compute_loss(pred, conv, *target[i], i, CLASSES=TRAIN_CLASSES) giou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = giou_loss + conf_loss + prob_loss gradients = tape.gradient(total_loss, yolo.trainable_variables) optimizer.apply_gradients(zip(gradients, yolo.trainable_variables)) # update learning rate # about warmup: https://arxiv.org/pdf/1812.01187.pdf&usg=ALkJrhglKOPDjNt6SHGbphTHyMcT0cuMJg global_steps.assign_add(1) if global_steps < warmup_steps: # and not TRAIN_TRANSFER: lr = global_steps / warmup_steps * TRAIN_LR_INIT else: lr = TRAIN_LR_END + 0.5 * (TRAIN_LR_INIT - TRAIN_LR_END) * ( (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))) optimizer.lr.assign(lr.numpy()) return global_steps.numpy(), optimizer.lr.numpy(), giou_loss.numpy( ), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy() validate_writer = tf.summary.create_file_writer(TRAIN_LOGDIR) def validate_step(image_data, target): with tf.GradientTape() as tape: pred_result = yolo(image_data, training=False) giou_loss = conf_loss = prob_loss = 0 # optimizing process grid = 3 if not TRAIN_YOLO_TINY else 2 for i in range(grid): conv, pred = pred_result[i * 2], pred_result[i * 2 + 1] loss_items = compute_loss(pred, conv, *target[i], i, CLASSES=TRAIN_CLASSES) giou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = giou_loss + conf_loss + prob_loss return giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy( ), total_loss.numpy() mAP_model = Create_Yolo( input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES) # create second model to measure mAP best_val_loss = 10000 # should be large at start for epoch in range(TRAIN_EPOCHS): count_train, giou_train, conf_train, prob_train, total_train, lr = 0., 0, 0, 0, 0, 0 for image_data, target in trainset: results = train_step(image_data, target) cur_step = results[0] % steps_per_epoch count_train += 1 lr += results[1] giou_train += results[2] conf_train += results[3] prob_train += results[4] total_train += results[5] print( "epoch:{:2.0f} step:{:5.0f}/{}, lr:{:.6f}, giou_loss:{:7.2f}, conf_loss:{:7.2f}, prob_loss:{:7.2f}, total_loss:{:7.2f}" .format(epoch, cur_step, steps_per_epoch, results[1], results[2], results[3], results[4], results[5])) # writing summary data with writer.as_default(): tf.summary.scalar("lr", lr / count_train, step=epoch) tf.summary.scalar("train loss/total_loss", total_train / count_train, step=epoch) tf.summary.scalar("train_loss/giou_loss", giou_train / count_train, step=epoch) tf.summary.scalar("train_loss/conf_loss", conf_train / count_train, step=epoch) tf.summary.scalar("train_loss/prob_loss", prob_train / count_train, step=epoch) writer.flush() if len(testset) == 0: print("configure TEST options to validate model") yolo.save_weights( os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME)) continue count_val, giou_val, conf_val, prob_val, total_val = 0., 0, 0, 0, 0 for image_data, target in testset: results = validate_step(image_data, target) count_val += 1 giou_val += results[0] conf_val += results[1] prob_val += results[2] total_val += results[3] # mAP = get_mAP(yolo, testset, score_threshold=TEST_SCORE_THRESHOLD, iou_threshold=TEST_IOU_THRESHOLD) # writing validate summary dat with validate_writer.as_default(): tf.summary.scalar("validate_loss/total_val", total_val / count_val, step=epoch) tf.summary.scalar("validate_loss/giou_val", giou_val / count_val, step=epoch) tf.summary.scalar("validate_loss/conf_val", conf_val / count_val, step=epoch) tf.summary.scalar("validate_loss/prob_val", prob_val / count_val, step=epoch) validate_writer.flush() print( "\n\ngiou_val_loss:{:7.2f}, conf_val_loss:{:7.2f}, prob_val_loss:{:7.2f}, total_val_loss:{:7.2f}\n\n" .format(giou_val / count_val, conf_val / count_val, prob_val / count_val, total_val / count_val)) if TRAIN_SAVE_CHECKPOINT and not TRAIN_SAVE_BEST_ONLY: save_directory = os.path.join( TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME + "_val_loss_{:7.2f}".format(total_val / count)) yolo.save_weights(save_directory) if TRAIN_SAVE_BEST_ONLY and best_val_loss > total_val / count_val: save_directory = os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME) yolo.save(save_directory) best_val_loss = total_val / count_val if not TRAIN_SAVE_BEST_ONLY and not TRAIN_SAVE_CHECKPOINT: save_directory = os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME) yolo.save_weights(save_directory) # measure mAP of trained custom model try: mAP_model.load_weights(save_directory + '/variables/variables') # use keras weights get_mAP(mAP_model, testset, score_threshold=TEST_SCORE_THRESHOLD, iou_threshold=TEST_IOU_THRESHOLD) except UnboundLocalError: print( "You don't have saved model weights to measure mAP, check TRAIN_SAVE_BEST_ONLY AND TRAIN SAVE_CHECKPOINT lines in configs.py" )
class Controller: # Constructor Method def __init__(self): # Attributes # General self.data = Dataset() self.activity_label = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', ] # Views self.results_view = None self.data_import_view = None self.pca_graphics_view = None self.pca_utilization_view = None self.choose_classifier_view = None self.feature_selection_view = None self.feature_selection_view = None # Create first screen: screen to select the database and the scenario to be used. def start(self): # Screens processing # Create the first screen self.data_import_view = ViewDataImport() self.data_import_view.show(self) # Pre-process the data in order of the chosen database and scenario. # Create second screen: choose feature selection method and the number of features. def data_import(self, database, scenario): # Data processing # Set database and scenario attributes and select the dataset to be classified. self.data.choose_data(database) # Pre-process data in function of the scenario self.scenario = scenario self.data.scenario_pre_processing(scenario) # Screens processing # Destroy data_import_view self.data_import_view.dismiss() # Create the new screen: feature_selection_view self.feature_selection_view = ViewFeatureSelectionAndReduction() self.feature_selection_view.show(self) # Apply the chosen feature selection. # Create the third screen: screen to decide to use PCA or not def feature_selection_and_reduction(self, feature_selection_method, number_feature): # Save the chosen feature selection method. self.data.set_feature_selection_method(feature_selection_method) # Apply feature selection # K-best method if feature_selection_method == 1: self.data.dataset = kbest(self.data.dataset, number_feature) # Kruskal_Wallis method if feature_selection_method == 2: self.data.dataset = kruskal_wallis(self.data.dataset, number_feature) # Eliminate redundant features that were maintained self.data.dataset, features_excluded = redundancy_measure( self.data.dataset) self.data.set_features_excluded_by_feature_reduction(features_excluded) # Screens processing # Destroy feature_selection_view self.feature_selection_view.dismiss() # Create the new screen: pca_utilization_view self.pca_utilization_view = ViewPCAUtilization() self.pca_utilization_view.show(self, number_feature) # Method to Execute the PCA analysis in order to show the two PCA graphics in the screen def run_pca_analisys(self): # Run PCA Analysis explained_variance_, x_values, singular_values_ = pca_analysis( self.data.dataset) # Screens processing # Destroy pca_utilization_view self.pca_utilization_view.dismiss() # Create the new screen: pca_graphics_view self.pca_graphics_view = ViewPCAGraphics() self.pca_graphics_view.show(self, explained_variance_, x_values, singular_values_) # If selected execute PCA feature reduction and then prepare the screen that will show the classifiers options def choose_classifier(self, n_features): # Apply PCA if the the function call came from run_pca_analysis if n_features != 0: self.data.dataset = run_pca(self.data.dataset, n_features) # Screens processing # Destroy pca_graphics_view if n_features != 0: self.pca_graphics_view.dismiss() else: self.pca_utilization_view.dismiss() # Create the new screen: choose_classifier_view self.choose_classifier_view = ViewChooseClassifier() self.choose_classifier_view.show(self) # Core method that will run the chosen classifier and prepare the result the be shown def classify(self, n_runs, n_subsets, classifier, constant_value): # Run classification as per user input for i in range(0, n_runs): # Structure to hold results of classification performance = { 'fp': 0, 'fn': 0, 'tp': 0, 'tn': 0, 'accuracy': 0, 'avg_misclassification': 0, 'misclassification_per_fold': [], 'avg_misclassification_per_fold': [], 'sensitivity': 0, 'specificity': 0 } # Apply K-fold: splitting the dataset kf = KFold(n_splits=n_subsets, shuffle=True) # K-fold Executions for idx_train, idx_test in kf.split(self.data.dataset["data"], self.data.dataset["target"]): prediction = [] # Train data x_train = [self.data.dataset["data"][idx] for idx in idx_train] x_train = np.asarray(x_train).astype(np.float64) y_train = [ self.data.dataset["target"][idx] for idx in idx_train ] # Test data x_test = [self.data.dataset["data"][idx] for idx in idx_test] x_test = np.asarray(x_test).astype(np.float64) y_test = [self.data.dataset["target"][idx] for idx in idx_test] # Check the classifier chosen to call the right method # Minimum distance classifier (MDC) if classifier == 1: prediction = minimum_distance_classifier( x_train, y_train, x_test, y_test) # Fisher Discriminant Analisys (Fisher LDA) elif classifier == 2: prediction = fisher_discriminant_analisys( x_train, y_train, x_test, y_test) # K-Nearest Neighbors (KNN) elif classifier == 3: prediction = k_nearest_neighbors(x_train, y_train, x_test, y_test, constant_value) # Bayes Classifier elif classifier == 4: prediction = bayes_classifier(x_train, y_train, x_test, y_test) # Support Vector Machines elif classifier == 5: prediction = support_vector_machines( x_train, y_train, x_test, y_test, constant_value) # Calculate performance performance = performance_measurement(y_test, prediction, data.scenario, performance) # Calculate average misclassification performance['avg_misclassification'] /= n_subsets performance['sensitivity'] /= n_subsets performance['specificity'] /= n_subsets print_performance(performance) # Screens processing # Destroy classifier choice view # self.choose_classifier_view.dismiss() # Create the new screen: pca_graphics_view self.results_view = ViewResult() self.results_view.show(self, classifier, performance, data.scenario) # Method to run the C-value or K-value test and prepare data to plot def test_k_and_c_value(self, classifier): # Variables run = 1 tests_results = [] constant_values = [] tests_results_std = [] if classifier == 3: n_runs = 50 n_subsets = 10 else: n_runs = 10 n_subsets = 3 # 50 Runs is K=[1, 3, 5, 7, 9,..100] for i in range(1, n_runs * 2): # Structure to hold results of classification performance = { 'fp': 0, 'fn': 0, 'tp': 0, 'tn': 0, 'accuracy': 0, 'avg_misclassification': 0, 'misclassification_per_fold': [], 'avg_misclassification_per_fold': [], 'sensitivity': 0, 'specificity': 0 } # Check if value is odd if i == 1 or i % 2 != 0: # Save all constant values used to use as x_axis on the plot constant_values.append(i) # Apply K-fold: splitting the dataset kf = KFold(n_splits=n_subsets, shuffle=True) # K-fold Executions for idx_train, idx_test in kf.split( self.data.dataset["data"], self.data.dataset["target"]): # Train data x_train = [ self.data.dataset["data"][idx] for idx in idx_train ] x_train = np.asarray(x_train).astype(np.float64) y_train = [ self.data.dataset["target"][idx] for idx in idx_train ] # Test data x_test = [ self.data.dataset["data"][idx] for idx in idx_test ] x_test = np.asarray(x_test).astype(np.float64) y_test = [ self.data.dataset["target"][idx] for idx in idx_test ] # Classifier verification # K-Nearest Neighbors (KNN) if classifier == 3: prediction = k_nearest_neighbors( x_train, y_train, x_test, y_test, i) # Support Vector Machines else: prediction = support_vector_machines( x_train, y_train, x_test, y_test, i) # Calculate performance performance = performance_measurement( y_test, prediction, data.scenario, performance) # Calculate averages for each class performance['avg_misclassification'] /= n_subsets performance['sensitivity'] /= n_subsets performance['specificity'] /= n_subsets # Save results for plot tests_results.append( np.average(performance['avg_misclassification'])) tests_results_std.append( np.std(performance['avg_misclassification_per_fold'])) # Debug results print("run ", run, " with k=", i) # print("average error per class: ", performance['avg_misclassification']) # print("all classes error average: ", np.average(performance['avg_misclassification'])) # print("average all class error per fold :", performance['avg_misclassification_per_fold']) # print("error standard deviation per fold", np.std(performance['avg_misclassification_per_fold'])) run += 1 return constant_values, np.multiply(tests_results, 100), np.multiply( tests_results_std, 100)
from model.trainer import RRSSTrainer from model.models import * from model.dataset import Dataset import tensorflow as tf import os os.environ['CUDA_VISIBLE_DEVICES'] = '1' if __name__ == "__main__": tf.config.experimental.set_memory_growth( tf.config.experimental.list_physical_devices('GPU')[0], True) # Load data dataset = Dataset(5, normalize=False) # Load model model = InteractionNetCNC(units_embed=256, units_conv=256, units_fc=256, pooling='sum', dropout=0.5, activation='relu', target=1, activation_out='linear', regularizer=0.0025, num_atoms=dataset.num_atoms, num_features=dataset.num_features, num_conv_layers_intra=1, num_conv_layers_inter=1, num_fc_layers=2)
def __init__(self): self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.classes = utils.read_class_name(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END self.first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS self.initial_weight = cfg.TRAIN.INITIAL_WEIGHT self.time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY self.max_bbox_per_scale = 150 self.train_logdir = "./data/log/train" self.trainset = Dataset('train') self.testset = Dataset('test') self.per_epch_num = len(self.trainset) self.sess = tf.compat.v1.Session(config = tf.compat.v1.ConfigProto(allow_soft_placement = True)) #GPU 自动调用 with tf.name_scope('define_input'): self.input_data = tf.compat.v1.placeholder(dtype = tf.float32, name='input_data') self.label_sbbox = tf.compat.v1.placeholder(dtype= tf.float32, name = 'label_sbbox') self.label_mbbox = tf.compat.v1.placeholder(dtype = tf.float32, name = 'label_mbbox') self.label_lbbox = tf.compat.v1.placeholder(dtype = tf.float32, name= 'label_lbbox') self.true_mbbox = tf.compat.v1.placeholder(dtype = tf.float32, name = 'true_mbbox') self.true_sbbox = tf.compat.v1.placeholder(dtype = tf.float32, name = 'true_sbbox') self.true_lbbox = tf.compat.v1.placeholder(dtype = tf.float32, name = 'true_lbbox') self.trainable =tf.compat.v1.placeholder(dtype = tf.bool, name = 'training') with tf.name_scope('define_loss'): self.model = YOLOV3(self.input_data, self.trainable) self.net_var = tf.compat.v1.global_variables() self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss( self.label_mbbox, self.label_lbbox, self.label_sbbox, self.true_lbbox, self.true_sbbox, self.true_mbbox ) self.loss = self.giou_loss + self.conf_loss + self.prob_loss with tf.name_scope('learn_rate'): self.global_step = tf.Variable(1.0, dtype = tf.float64, trainable = False, name = 'global_step') # why set warmup_setps warmup_setps = tf.constant(self.warmup_periods * self.per_epch_num) train_steps = tf.constant((self.first_stage_epochs + self.second_stage_epochs) * self.per_epch_num, dtype= tf.float64, name='train_steps') # training learn rate how to change self.learn_rate = tf.cond( pred = self.global_step < warmup_setps, #预热 ,周期性变化在最大最小学习率之间 true_fn=lambda : self.global_step / warmup_setps * self.learn_rate_init, false_fn = lambda : self.learn_rate_end + 0.5*(self.learn_rate_init - self.learn_rate_end)* (1 + tf.cos( (self.global_step - warmup_setps) / (train_steps - warmup_setps) * np.pi) ) ) global_setp_update = tf.compat.v1.assign_add(self.global_step, 1.0) with tf.name_scope("define_weight_decay"): moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.compat.v1.trainable_variables()) with tf.name_scope("define_first_stage_train"): self.first_trainable_var_list = [] for var in tf.compat.v1.trainable_variables(): var_name = var.op.name var_name_mess = str(var_name).split('/') if var_name_mess[0] in ['conv_sbbox', 'conv_mbbox', 'conv_lbbox']: self.first_trainable_var_list.append(var) first_stage_optimizer = tf.compat.v1.train.AdamOptimizer(self.learn_rate).minimize(self.loss, var_list = self.first_trainable_var_list) with tf.control_dependencies(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)): #保存训练之前完成的一些操作 优化器,步数的变化 with tf.control_dependencies([first_stage_optimizer, global_setp_update]): with tf.control_dependencies([moving_ave]): self.train_op_with_frozen_variables = tf.no_op() with tf.name_scope("define_second_stage_train"): second_stage_trainable_var_list = tf.compat.v1.trainable_variables() second_stage_optimizer = tf.compat.v1.train.AdamOptimizer(self.learn_rate).minimize(self.loss, var_list = second_stage_trainable_var_list) with tf.control_dependencies(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)): #保存训练之前完成的一些操作 优化器,步数的变化 with tf.control_dependencies([second_stage_optimizer, global_setp_update]): with tf.control_dependencies([moving_ave]): self.train_op_with_all_variables = tf.no_op() with tf.name_scope('loader_and_sever'): self.loader = tf.compat.v1.train.Saver(self.net_var) #保存全局变量 self.saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables(), max_to_keep=10) with tf.name_scope('summary'): tf.summary.scalar("learn_rate", self.learn_rate) tf.summary.scalar("giou_loss", self.giou_loss) tf.summary.scalar("conf_loss", self.conf_loss) tf.summary.scalar("prob_loss", self.prob_loss) tf.summary.scalar("total_loss", self.loss) logdir = "./data/log/" if os.path.exists(logdir): shutil.rmtree(logdir) #递归删除文件夹中内容 os.makedirs(logdir) self.write_op = tf.compat.v1.summary.merge_all() #可以将所有summary全部保存到磁盘,以便tensorboard显示 self.summary_writer = tf.compat.v1.summary.FileWriter(logdir, graph=self.sess.graph) #保存图结构
class TestDataset(unittest.TestCase): def setUpClass(): logging.basicConfig(level=logging.DEBUG) def setUp(self): self.logger = logging.getLogger() self.itemFile = Datafile('test.model.items.xml', self.id()) self.sessionFile = Datafile('test.model.session.xml', self.id()) self.currencyFile = Datafile('test.model.currency.xml', self.id()) self.dataset = Dataset( self.logger, './', self.sessionFile.getFilename(), self.itemFile.getFilename(), self.currencyFile.getFilename()) def tearDown(self): self.itemFile.clear() self.sessionFile.clear() self.currencyFile.clear() del self.dataset def test_restorePersist(self): self.dataset.restore() self.dataset.persist() self.dataset.restore() def test_sessionPairs(self): self.dataset.restore() sessionID = 1051183055 # read pairs pairs = self.dataset.getSessionPairs(sessionID) self.assertEqual(len(pairs), 2, 'Model.Session: Expected session 2 pairs, got: %(pairs)s.' % { 'pairs': str(pairs) }) # update existing pair self.dataset.updateSessionPairs(sessionID, AddedItemCodes = sessionID) pairs = self.dataset.getSessionPairs(sessionID) self.assertTrue('AddedItemCodes' in pairs and pairs['AddedItemCodes'] == sessionID, 'Model.Session: Updated a pair ''AddedItemCodes'' but got: %(pairs)s.' % { 'pairs': str(pairs) }) # insert new pair self.dataset.updateSessionPairs(sessionID, NewPair = 'NewPair') pairs = self.dataset.getSessionPairs(sessionID) self.assertTrue(len(pairs) == 3 and 'NewPair' in pairs, 'Model.Session: Added a pair ''NewPair'' but got: %(pairs)s.' % { 'pairs': str(pairs) }) # delete a pair self.dataset.updateSessionPairs(sessionID, CreatedTimestamp = None) pairs = self.dataset.getSessionPairs(sessionID) self.assertTrue(len(pairs) == 2 and 'CreatedTimestamp' not in pairs, 'Model.Session: Deleted a pair ''CreatedTimestamp'' but got: %(pairs)s.' % { 'pairs': str(pairs) }) def test_sessionValues(self): self.dataset.restore() sessionID = 1051183055 # read existing value value = self.dataset.getSessionValue(sessionID, 'CreatedTimestamp') self.assertEqual(value, '2014-02-16 14:27:16.460836', 'Model.Session.Value: Expected value ''2014-02-16 14:27:16.460836'' of a key ''CreatedTimestamp'', got: %(value)s.' % { 'value': str(value) }) # read non-existing value value = self.dataset.getSessionValue(sessionID, 'NotExisting') self.assertEqual(value, None, 'Model.Session.Value: Reading non-existing key ''NotExisting'' returned a value: %(value)s.' % { 'value': str(value) }) def test_getItems(self): self.dataset.restore() # get all items items = self.dataset.getItems(None) self.assertEqual(len(items), 30) # add a new item code = self.dataset.getNextItemCode() self.assertTrue(self.dataset.addItem( code=code, owner=34, title='Clever Bull', author='Redfox', medium='Pencil', state='OTHER', initialAmount='12.3', charity='43', note=None, importNumber=None)) # get new item and verify data types item = self.dataset.getItem(code) self.assertTrue(isinstance(item[ItemField.OWNER], int)) self.assertTrue(isinstance(item[ItemField.CHARITY], int)) self.assertTrue(isinstance(item[ItemField.INITIAL_AMOUNT], Decimal)) self.assertTrue(isinstance(item[ItemField.TITLE], str)) self.assertTrue(isinstance(item[ItemField.AUTHOR], str)) self.assertTrue(isinstance(item[ItemField.STATE], str)) self.assertTrue(isinstance(item[ItemField.CODE], str)) self.assertTrue(isinstance(item[ItemField.MEDIUM], str)) self.assertIsNone(item[ItemField.NOTE]) self.assertIsNone(item[ItemField.IMPORT_NUMBER]) # get all items again and see whether we have added just one item # i.e. the reserved item is not present items = self.dataset.getItems(None) self.assertEqual(len(items), 31) def test_getUpdateItem(self): self.dataset.restore() # Get existing item item = self.dataset.getItem('A3') self.assertNotEqual(item, None) # Update title newTitle = 'ABCDEFGH' item[ItemField.TITLE] = newTitle self.assertTrue(self.dataset.updateItem('A3', **item)) items = self.dataset.getItems('Title=="{0}"'.format(newTitle)) self.assertEqual(len(items), 1) self.assertEqual(items[0][ItemField.CODE], 'A3') # Update buyer newBuyer = 9999 self.assertTrue(self.dataset.updateItem('A3', **{ItemField.BUYER: newBuyer})) items = self.dataset.getItems('Buyer=="{0}"'.format(newBuyer)) self.assertEqual(len(items), 1) self.assertEqual(items[0][ItemField.CODE], 'A3') def test_getNextItemCode(self): self.dataset.restore() # Create new top based on dataset code = self.dataset.getNextItemCode() self.assertEqual(code, '57') # Advance code = self.dataset.getNextItemCode() self.assertEqual(code, '58') code = self.dataset.getNextItemCode() self.assertEqual(code, '59') # Jump code = self.dataset.getNextItemCode(100) self.assertEqual(code, '100') code = self.dataset.getNextItemCode() self.assertEqual(code, '101') # Jumping backward is not allowed code = self.dataset.getNextItemCode(50) self.assertEqual(code, '102') code = self.dataset.getNextItemCode(102) self.assertEqual(code, '103') # Requesting suggested code should fail if it is not possible to fulfill the request # without updating the counter. code = self.dataset.getNextItemCode(102, True) self.assertIsNone(code) code = self.dataset.getNextItemCode(102) self.assertEqual(code, '104') def test_normalizeItemImport(self): # Item not for sale. result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '1', ImportedItemField.OWNER: '23', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: '', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '', ImportedItemField.CHARITY: ''}) self.assertEqual(result, Result.SUCCESS) self.assertDictEqual(item, { ImportedItemField.NUMBER: 1, ImportedItemField.OWNER: 23, ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: None, ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: None, ImportedItemField.CHARITY: None }) # Item for sale. result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '', ImportedItemField.OWNER: '', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencils', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.50', ImportedItemField.CHARITY: '100'}) self.assertEqual(result, Result.SUCCESS) self.assertDictEqual(item, { ImportedItemField.NUMBER: None, ImportedItemField.OWNER: None, ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencils', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.50', ImportedItemField.CHARITY: 100 }) # Invalid amount result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '', ImportedItemField.OWNER: '23', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: '', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.M', ImportedItemField.CHARITY: '100'}) self.assertEqual(result, Result.INVALID_AMOUNT) # Invalid charity result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '', ImportedItemField.OWNER: '23', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencil', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.5', ImportedItemField.CHARITY: 'X'}) self.assertEqual(result, Result.INVALID_CHARITY) # Invalid owner result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '', ImportedItemField.OWNER: 'DX', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencil', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.5', ImportedItemField.CHARITY: '100'}) self.assertEqual(result, Result.INVALID_ITEM_OWNER) # Invalid number result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '??', ImportedItemField.OWNER: '', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencil', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.5', ImportedItemField.CHARITY: '100'}) self.assertEqual(result, Result.INVALID_ITEM_NUMBER) def test_getCurrencyInfo(self): self.dataset.restore() # guarantee that the result matches order of the input currencyInfoList = self.dataset.getCurrencyInfo(['czk', 'eur', 'usd']) self.assertListEqual( ['czk', 'eur', 'usd'], [currencyInfo[CurrencyField.CODE] for currencyInfo in currencyInfoList]) currencyInfoList = self.dataset.getCurrencyInfo(['usd', 'eur', 'czk']) self.assertListEqual( ['usd', 'eur', 'czk'], [currencyInfo[CurrencyField.CODE] for currencyInfo in currencyInfoList]) # missing currency currencyInfoList = self.dataset.getCurrencyInfo(['usd', 'eur', 'xxx']) self.assertListEqual( ['usd', 'eur', 'xxx'], [currencyInfo[CurrencyField.CODE] for currencyInfo in currencyInfoList]) def test_updateCurrencyInfo(self): self.dataset.restore() # update with valid data and various method of writing the amount self.assertEqual( Result.SUCCESS, self.dataset.updateCurrencyInfo([ { CurrencyField.CODE: 'czk', CurrencyField.AMOUNT_IN_PRIMARY: '1.23'}, { CurrencyField.CODE: 'eur', CurrencyField.AMOUNT_IN_PRIMARY: 4.56 }])) currencyInfoList = self.dataset.getCurrencyInfo(['czk', 'eur', 'usd']) self.assertListEqual( [Decimal('1.23'), Decimal(4.56), Decimal('19.71')], [currencyInfo[CurrencyField.AMOUNT_IN_PRIMARY] for currencyInfo in currencyInfoList]) # update with invalid data self.assertEqual( Result.INPUT_ERROR, self.dataset.updateCurrencyInfo([ { CurrencyField.CODE: 'czk' }])) self.assertEqual( Result.INPUT_ERROR, self.dataset.updateCurrencyInfo([ { CurrencyField.AMOUNT_IN_PRIMARY: 4.56 }]))
# Configure paths items_controller.ROOT_DIR_CUSTOM_DATA = config.CUSTOM_DATA_FOLDER auction_controller.ROOT_DIR_CUSTOM_DATA = config.CUSTOM_DATA_FOLDER # Configure flask app = flask.Flask("Artshow") app.root_path = ROOT_PATH app.register_blueprint(items_controller.blueprint, url_prefix=items_controller.URL_PREFIX) app.register_blueprint(auction_controller.blueprint, url_prefix=auction_controller.URL_PREFIX) app.register_blueprint(reconciliation_controller.blueprint, url_prefix=reconciliation_controller.URL_PREFIX) app.register_blueprint(settings_controller.blueprint, url_prefix=settings_controller.URL_PREFIX) app.secret_key = config.SESSION_KEY # Initialize application dataset = Dataset(logging.getLogger("dataset"), config.DATA_FOLDER) dataset.restore() currency = Currency(logging.getLogger("currency"), dataset, currencyCodes=config.CURRENCY) model = Model(logging.getLogger("model"), dataset, currency) dictionaryPath = os.path.join(os.path.dirname(__file__), "locale") for language in config.LANGUAGES: registerDictionary( language, PhraseDictionary( logging.getLogger("dictionary"), os.path.join(dictionaryPath, "translation.{0}.xml".format(language)) ), ) del dictionaryPath @app.before_request
class TestDataset(unittest.TestCase): def setUpClass(): logging.basicConfig(level=logging.DEBUG) def setUp(self): self.logger = logging.getLogger() self.itemFile = Datafile('test.model.items.xml', self.id()) self.sessionFile = Datafile('test.model.session.xml', self.id()) self.currencyFile = Datafile('test.model.currency.xml', self.id()) self.dataset = Dataset(self.logger, './', self.sessionFile.getFilename(), self.itemFile.getFilename(), self.currencyFile.getFilename()) def tearDown(self): self.itemFile.clear() self.sessionFile.clear() self.currencyFile.clear() del self.dataset def test_restorePersist(self): self.dataset.restore() self.dataset.persist() self.dataset.restore() def test_sessionPairs(self): self.dataset.restore() sessionID = 1051183055 # read pairs pairs = self.dataset.getSessionPairs(sessionID) self.assertEqual( len(pairs), 2, 'Model.Session: Expected session 2 pairs, got: %(pairs)s.' % {'pairs': str(pairs)}) # update existing pair self.dataset.updateSessionPairs(sessionID, AddedItemCodes=sessionID) pairs = self.dataset.getSessionPairs(sessionID) self.assertTrue( 'AddedItemCodes' in pairs and pairs['AddedItemCodes'] == sessionID, 'Model.Session: Updated a pair ' 'AddedItemCodes' ' but got: %(pairs)s.' % {'pairs': str(pairs)}) # insert new pair self.dataset.updateSessionPairs(sessionID, NewPair='NewPair') pairs = self.dataset.getSessionPairs(sessionID) self.assertTrue( len(pairs) == 3 and 'NewPair' in pairs, 'Model.Session: Added a pair ' 'NewPair' ' but got: %(pairs)s.' % {'pairs': str(pairs)}) # delete a pair self.dataset.updateSessionPairs(sessionID, CreatedTimestamp=None) pairs = self.dataset.getSessionPairs(sessionID) self.assertTrue( len(pairs) == 2 and 'CreatedTimestamp' not in pairs, 'Model.Session: Deleted a pair ' 'CreatedTimestamp' ' but got: %(pairs)s.' % {'pairs': str(pairs)}) def test_sessionValues(self): self.dataset.restore() sessionID = 1051183055 # read existing value value = self.dataset.getSessionValue(sessionID, 'CreatedTimestamp') self.assertEqual( value, '2014-02-16 14:27:16.460836', 'Model.Session.Value: Expected value ' '2014-02-16 14:27:16.460836' ' of a key ' 'CreatedTimestamp' ', got: %(value)s.' % {'value': str(value)}) # read non-existing value value = self.dataset.getSessionValue(sessionID, 'NotExisting') self.assertEqual( value, None, 'Model.Session.Value: Reading non-existing key ' 'NotExisting' ' returned a value: %(value)s.' % {'value': str(value)}) def test_getItems(self): self.dataset.restore() # get all items items = self.dataset.getItems(None) self.assertEqual(len(items), 30) # add a new item code = self.dataset.getNextItemCode() self.assertTrue( self.dataset.addItem(code=code, owner=34, title='Clever Bull', author='Redfox', medium='Pencil', state='OTHER', initialAmount='12.3', charity='43', note=None, importNumber=None)) # get new item and verify data types item = self.dataset.getItem(code) self.assertTrue(isinstance(item[ItemField.OWNER], int)) self.assertTrue(isinstance(item[ItemField.CHARITY], int)) self.assertTrue(isinstance(item[ItemField.INITIAL_AMOUNT], Decimal)) self.assertTrue(isinstance(item[ItemField.TITLE], str)) self.assertTrue(isinstance(item[ItemField.AUTHOR], str)) self.assertTrue(isinstance(item[ItemField.STATE], str)) self.assertTrue(isinstance(item[ItemField.CODE], str)) self.assertTrue(isinstance(item[ItemField.MEDIUM], str)) self.assertIsNone(item[ItemField.NOTE]) self.assertIsNone(item[ItemField.IMPORT_NUMBER]) # get all items again and see whether we have added just one item # i.e. the reserved item is not present items = self.dataset.getItems(None) self.assertEqual(len(items), 31) def test_getUpdateItem(self): self.dataset.restore() # Get existing item item = self.dataset.getItem('A3') self.assertNotEqual(item, None) # Update title newTitle = 'ABCDEFGH' item[ItemField.TITLE] = newTitle self.assertTrue(self.dataset.updateItem('A3', **item)) items = self.dataset.getItems('Title=="{0}"'.format(newTitle)) self.assertEqual(len(items), 1) self.assertEqual(items[0][ItemField.CODE], 'A3') # Update buyer newBuyer = 9999 self.assertTrue( self.dataset.updateItem('A3', **{ItemField.BUYER: newBuyer})) items = self.dataset.getItems('Buyer=="{0}"'.format(newBuyer)) self.assertEqual(len(items), 1) self.assertEqual(items[0][ItemField.CODE], 'A3') def test_getNextItemCode(self): self.dataset.restore() # Create new top based on dataset code = self.dataset.getNextItemCode() self.assertEqual(code, '57') # Advance code = self.dataset.getNextItemCode() self.assertEqual(code, '58') code = self.dataset.getNextItemCode() self.assertEqual(code, '59') # Jump code = self.dataset.getNextItemCode(100) self.assertEqual(code, '100') code = self.dataset.getNextItemCode() self.assertEqual(code, '101') # Jumping backward is not allowed code = self.dataset.getNextItemCode(50) self.assertEqual(code, '102') code = self.dataset.getNextItemCode(102) self.assertEqual(code, '103') # Requesting suggested code should fail if it is not possible to fulfill the request # without updating the counter. code = self.dataset.getNextItemCode(102, True) self.assertIsNone(code) code = self.dataset.getNextItemCode(102) self.assertEqual(code, '104') def test_normalizeItemImport(self): # Item not for sale. result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '1', ImportedItemField.OWNER: '23', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: '', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '', ImportedItemField.CHARITY: '' }) self.assertEqual(result, Result.SUCCESS) self.assertDictEqual( item, { ImportedItemField.NUMBER: 1, ImportedItemField.OWNER: 23, ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: None, ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: None, ImportedItemField.CHARITY: None }) # Item for sale. result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '', ImportedItemField.OWNER: '', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencils', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.50', ImportedItemField.CHARITY: '100' }) self.assertEqual(result, Result.SUCCESS) self.assertDictEqual( item, { ImportedItemField.NUMBER: None, ImportedItemField.OWNER: None, ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencils', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.50', ImportedItemField.CHARITY: 100 }) # Invalid amount result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '', ImportedItemField.OWNER: '23', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: '', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.M', ImportedItemField.CHARITY: '100' }) self.assertEqual(result, Result.INVALID_AMOUNT) # Invalid charity result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '', ImportedItemField.OWNER: '23', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencil', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.5', ImportedItemField.CHARITY: 'X' }) self.assertEqual(result, Result.INVALID_CHARITY) # Invalid owner result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '', ImportedItemField.OWNER: 'DX', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencil', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.5', ImportedItemField.CHARITY: '100' }) self.assertEqual(result, Result.INVALID_ITEM_OWNER) # Invalid number result, item = self.dataset.normalizeItemImport({ ImportedItemField.NUMBER: '??', ImportedItemField.OWNER: '', ImportedItemField.AUTHOR: 'Wolf', ImportedItemField.TITLE: 'Trees', ImportedItemField.MEDIUM: 'Pencil', ImportedItemField.NOTE: 'Note', ImportedItemField.INITIAL_AMOUNT: '23.5', ImportedItemField.CHARITY: '100' }) self.assertEqual(result, Result.INVALID_ITEM_NUMBER) def test_getCurrencyInfo(self): self.dataset.restore() # guarantee that the result matches order of the input currencyInfoList = self.dataset.getCurrencyInfo(['czk', 'eur', 'usd']) self.assertListEqual(['czk', 'eur', 'usd'], [ currencyInfo[CurrencyField.CODE] for currencyInfo in currencyInfoList ]) currencyInfoList = self.dataset.getCurrencyInfo(['usd', 'eur', 'czk']) self.assertListEqual(['usd', 'eur', 'czk'], [ currencyInfo[CurrencyField.CODE] for currencyInfo in currencyInfoList ]) # missing currency currencyInfoList = self.dataset.getCurrencyInfo(['usd', 'eur', 'xxx']) self.assertListEqual(['usd', 'eur', 'xxx'], [ currencyInfo[CurrencyField.CODE] for currencyInfo in currencyInfoList ]) def test_updateCurrencyInfo(self): self.dataset.restore() # update with valid data and various method of writing the amount self.assertEqual( Result.SUCCESS, self.dataset.updateCurrencyInfo([{ CurrencyField.CODE: 'czk', CurrencyField.AMOUNT_IN_PRIMARY: '1.23' }, { CurrencyField.CODE: 'eur', CurrencyField.AMOUNT_IN_PRIMARY: 4.56 }])) currencyInfoList = self.dataset.getCurrencyInfo(['czk', 'eur', 'usd']) self.assertListEqual( [Decimal('1.23'), Decimal(4.56), Decimal('19.71')], [ currencyInfo[CurrencyField.AMOUNT_IN_PRIMARY] for currencyInfo in currencyInfoList ]) # update with invalid data self.assertEqual( Result.INPUT_ERROR, self.dataset.updateCurrencyInfo([{ CurrencyField.CODE: 'czk' }])) self.assertEqual( Result.INPUT_ERROR, self.dataset.updateCurrencyInfo([{ CurrencyField.AMOUNT_IN_PRIMARY: 4.56 }]))
def main(argv=None): # Configurations config = Config(gpu='1', root_dir='./data/train/', root_dir_val='./data/val/', mode='training') # Create data feeding pipeline. dataset_train = Dataset(config, 'train') dataset_val = Dataset(config, 'val') # Train Graph losses, g_op, d_op, fig = _step(config, dataset_train, training_nn=True) losses_val, _, _, fig_val = _step(config, dataset_val, training_nn=False) # Add ops to save and restore all the variables. saver = tf.train.Saver(max_to_keep=50, ) with tf.Session(config=config.GPU_CONFIG) as sess: # Restore the model ckpt = tf.train.get_checkpoint_state(config.LOG_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) last_epoch = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] print('**********************************************************') print('Restore from Epoch ' + str(last_epoch)) print('**********************************************************') else: init = tf.initializers.global_variables() last_epoch = 0 sess.run(init) print('**********************************************************') print('Train from scratch.') print('**********************************************************') avg_loss = Error() print_list = {} for epoch in range(int(last_epoch), config.MAX_EPOCH): start = time.time() # Train one epoch for step in range(config.STEPS_PER_EPOCH): if step % config.G_D_RATIO == 0: _losses = sess.run(losses + [g_op, d_op, fig]) else: _losses = sess.run(losses + [g_op, fig]) # Logging print_list['g_loss'] = _losses[0] print_list['d_loss'] = _losses[1] print_list['a_loss'] = _losses[2] display_list = ['Epoch '+str(epoch+1)+'-'+str(step+1)+'/'+ str(config.STEPS_PER_EPOCH)+':'] +\ [avg_loss(x) for x in print_list.items()] print(*display_list + [' '], end='\r') # Visualization if step % config.LOG_FR_TRAIN == 0: fname = config.LOG_DIR + '/Epoch-' + str( epoch + 1) + '-' + str(step + 1) + '.png' cv2.imwrite(fname, _losses[-1]) # Model saving saver.save(sess, config.LOG_DIR + '/ckpt', global_step=epoch + 1) print('\n', end='\r') # Validate one epoch for step in range(config.STEPS_PER_EPOCH_VAL): _losses = sess.run(losses_val + [fig_val]) # Logging print_list['g_loss'] = _losses[0] print_list['d_loss'] = _losses[1] print_list['a_loss'] = _losses[2] display_list = ['Epoch '+str(epoch+1)+'-Val-'+str(step+1)+'/'+ str(config.STEPS_PER_EPOCH_VAL)+':'] +\ [avg_loss(x, val=1) for x in print_list.items()] print(*display_list + [' '], end='\r') # Visualization if step % config.LOG_FR_TEST == 0: fname = config.LOG_DIR + '/Epoch-' + str( epoch + 1) + '-Val-' + str(step + 1) + '.png' cv2.imwrite(fname, _losses[-1]) # time of one epoch print('\n Time taken for epoch {} is {:3g} sec'.format( epoch + 1, time.time() - start)) avg_loss.reset()
class Trainer(object): def __init__(self, dataset): self.data = None self.model = None self.hyper = {"dataset": dataset} self.log = {} def __repr__(self): text = "" for key, value in self.log.items(): text += "{}:\t".format(key) for error in value[0]: text += "{0:.4f} ".format(float(error)) text += "\n" return text def load_data(self, dataset=None, batch=128): self.data = Dataset(dataset=dataset, batch=batch) self.hyper["num_train"] = len(self.data.y["train"]) self.hyper["num_val"] = len(self.data.y["valid"]) self.hyper["num_test"] = len(self.data.y["test"]) self.hyper["target_size"] = self.data.target_size self.hyper["molecule_size"] = self.data.molecule_size self.hyper["num_features"] = self.data.num_features self.hyper["task"] = self.data.task self.hyper["outputs"] = self.data.outputs self.hyper["batch"] = batch print("finish loading data with batch size", batch) def fit(self, model, epoch, batch=128, fold=10, pooling="max", units_conv=128, units_dense=128, num_layers=2, monitor="val_rmse", mode="min", use_multiprocessing=True, label="", *args, **kwargs): # 1. Generate CV folder now = datetime.now() base_path = "../../result/{}/{}/".format(model, self.hyper["dataset"]) log_path = base_path results = [] for i in range(1, fold + 1): start_time = time.time() # 2. Generate data self.load_data(dataset=self.hyper['dataset'], batch=batch) self.data.set_features(**kwargs) self.hyper["num_features"] = self.data.num_features # 3. Make model self.model = getattr(m, model)(*args, **self.hyper, **kwargs) self.model.summary() # 4. Callbacks log_path = base_path + "{}_c{}_d{}_l{}_p{}_{}{}/".format(batch, units_conv, units_dense, num_layers, pooling, label, now.strftime("%m%d%H")) tb_path = log_path + "trial_{}/".format(i) callbacks = [] callbacks.append(Roc(self.data.generator("valid"))) mode = "max" callbacks += [Tensorboard(log_dir=tb_path, write_graph=False, histogram_freq=0, write_images=True), ModelCheckpoint(tb_path + "{epoch:01d}-{" + monitor + ":.3f}.hdf5", monitor=monitor, save_weights_only=True, save_best_only=True, period=1, mode=mode), EarlyStopping(patience=15, restore_best_weights=True), # 15, hiv=10 ReduceLROnPlateau(monitor="val_loss", factor=0.9, patience=10, min_lr=0.0005)] # 5. Fit self.model.fit_generator(self.data.generator("train"), epochs=epoch, validation_data=self.data.generator("valid"), callbacks=callbacks, use_multiprocessing=use_multiprocessing, workers=4) self.model.save_weights(tb_path + "best_weight.hdf5") self.hyper["train_time"] = time.time() - start_time # 6. Save train, valid, test losses losses = [] for gen in [self.data.generator("train"), self.data.generator("valid"), self.data.generator("test")]: val_roc, val_pr = calculate_roc_pr(self.model, gen) losses.append(val_roc) losses.append(val_pr) results.append([losses[0], losses[2], losses[4], losses[1], losses[3], losses[5]]) # 7. Save hyper with open(tb_path + "hyper.csv", "w") as file: writer = csv.DictWriter(file, fieldnames=list(self.hyper.keys())) writer.writeheader() writer.writerow(self.hyper) # 8. Save data split and test results # for target in ["train", "valid", "test"]: # pred = self.model.predict_generator(self.data.generator(target, task="input_only"), # use_multiprocessing=use_multiprocessing, workers=10) # self.data.save_dataset(tb_path, pred=pred, target=target) # 9. Save cross validation results header = ["train_roc", "valid_roc", "test_roc", "train_pr", "valid_pr", "test_pr"] with open(log_path + "raw_results.csv", "w") as file: writer = csv.writer(file, delimiter=",") writer.writerow(header) for r in results: writer.writerow(r) results = np.array(results) results = [np.mean(results, axis=0), np.std(results, axis=0)] with open(log_path + "results.csv", "w") as csvfile: writer = csv.writer(csvfile, delimiter=",") writer.writerow(header) for r in results: writer.writerow(r) # Update cross validation log self.log["{}_B{}_C{}_D{}_L{}_P{}".format(model, batch, units_conv, units_dense, num_layers, pooling, )] = results print(self) print("Training Ended")
def perform_lrp(model, hyper, trial=0, sample=None, epsilon=0.1, gamma=0.1): tf.config.experimental.set_memory_growth( tf.config.experimental.list_physical_devices('GPU')[0], True) # Make folder fig_path = "../analysis/{}".format(model) if not os.path.isdir(fig_path): os.mkdir(fig_path) fig_path = "../analysis/{}/{}".format(model, hyper) if not os.path.isdir(fig_path): os.mkdir(fig_path) fig_path = "../analysis/{}/{}/heatmap".format(model, hyper) if not os.path.isdir(fig_path): os.mkdir(fig_path) # Load results base_path = "../result/{}/{}/".format(model, hyper) path = base_path + 'trial_{:02d}/'.format(trial) # Load hyper with open(path + 'hyper.csv', newline='') as csvfile: reader = csv.DictReader(csvfile) for row in reader: hyper = dict(row) # Load model custom_objects = { 'NodeEmbedding': NodeEmbedding, 'GraphConvolution': GraphConvolution, 'Normalize': Normalize, 'GlobalPooling': GlobalPooling } model = load_model(path + 'best_model.h5', custom_objects=custom_objects) print([l.name for l in model.layers]) # Load data data = np.load(path + 'data_split.npz') dataset = Dataset('refined', 5) if sample is not None: dataset.split_by_idx(32, data['train'], data['valid'], data['test'][sample]) else: dataset.split_by_idx(32, data['train'], data['valid'], data['test']) data.close() # Predict true_y = dataset.test_y outputs = {} for layer_name in [ 'node_embedding', 'node_embedding_1', 'normalize', 'normalize_1', 'activation', 'add', 'activation_1', 'add_1', 'global_pooling', 'activation_2', 'activation_3', 'activation_4', 'atom_feature_input' ]: sub_model = tf.keras.models.Model( inputs=model.input, outputs=model.get_layer(layer_name).output) outputs[layer_name] = sub_model.predict(dataset.test, steps=dataset.test_step, verbose=0)[:len(true_y)] # Output layer: LRP-0 # print('Calculating Dense_2...') relevance = lrp_dense(outputs['activation_3'], outputs['activation_4'], model.get_layer('dense_2').get_weights()[0], model.get_layer('dense_2').get_weights()[1], epsilon=0) # Dense layer: LRP-e # print('Calculating Dense_1...') relevance = lrp_dense(outputs['activation_2'], relevance, model.get_layer('dense_1').get_weights()[0], model.get_layer('dense_1').get_weights()[1], epsilon=epsilon) # Dense layer: LRP-e # print('Calculating Dense_0...') relevance = lrp_dense(outputs['global_pooling'], relevance, model.get_layer('dense').get_weights()[0], model.get_layer('dense').get_weights()[1], epsilon=epsilon) # Pooling layer # print('Calculating Pooling...') relevance = lrp_pooling(outputs['activation_1'], relevance) # Add layer # print('Calculating Add_1...') relevance_1, relevance_2 = lrp_add( [outputs['add'], outputs['activation_1']], relevance) # GCN layer: LRP-g # print('Calculating GCN_1...') relevance = lrp_gcn_gamma( outputs['add'], relevance_2, outputs['normalize_1'], model.get_layer('graph_convolution_1').get_weights()[0], gamma=gamma) + relevance_1 # Add layer # print('Calculating Add_0...') relevance_1, relevance_2 = lrp_add( [outputs['graph_embedding_1'], outputs['activation']], relevance) # GCN layer: LRP-g # print('Calculating GCN_0...') relevance = lrp_gcn_gamma( outputs['graph_embedding_1'], relevance_2, outputs['normalize'], model.get_layer('graph_convolution').get_weights()[0], gamma=gamma) + relevance_1 # Embedding layer : LRP-e # print('Calculating Embedding_1...') relevance = lrp_dense( outputs['graph_embedding'], relevance, model.get_layer('graph_embedding_1').get_weights()[0], model.get_layer('graph_embedding_1').get_weights()[1], epsilon=epsilon) # Embedding layer : LRP-e # print('Calculating Embedding_0...') relevance = lrp_dense(outputs['atom_feature_input'], relevance, model.get_layer('graph_embedding').get_weights()[0], model.get_layer('graph_embedding').get_weights()[1], epsilon=epsilon) relevance = tf.math.reduce_sum(relevance, axis=-1).numpy() relevance = np.divide(relevance, np.expand_dims(true_y, -1)) # Preset DrawingOptions.bondLineWidth = 1.5 DrawingOptions.elemDict = {} DrawingOptions.dotsPerAngstrom = 20 DrawingOptions.atomLabelFontSize = 4 DrawingOptions.atomLabelMinFontSize = 4 DrawingOptions.dblBondOffset = 0.3 DrawingOptions.wedgeDashedBonds = False # Load data dataframe = pd.read_pickle('../data/5A.pkl') if sample is not None: test_set = np.load(path + 'data_split.npz')['test'][sample] else: test_set = np.load(path + 'data_split.npz')['test'] # Draw images for test molecules colormap = cm.get_cmap('seismic') for idx, test_idx in enumerate(test_set): print('Drawing figure for {}/{}'.format(idx, len(test_set))) pdb_code = dataframe.iloc[test_idx]['code'] error = np.absolute(dataframe.iloc[test_idx]['output'] - outputs['activation_4'][idx])[0] if error > 0.2: continue for mol_ligand, mol_pocket in zip( Chem.SDMolSupplier( '../data/refined-set/{}/{}_ligand.sdf'.format( pdb_code, pdb_code)), Chem.SDMolSupplier( '../data/refined-set/{}/{}_pocket.sdf'.format( pdb_code, pdb_code))): # Crop atoms mol = Chem.CombineMols(mol_ligand, mol_pocket) distance = np.array(rdmolops.Get3DDistanceMatrix(mol)) cropped_idx = np.argwhere( np.min(distance[:, :mol_ligand.GetNumAtoms()], axis=1) <= 5 ).flatten() unpadded_relevance = np.zeros((mol.GetNumAtoms(), )) np.put(unpadded_relevance, cropped_idx, relevance[idx]) scale = max(max(unpadded_relevance), math.fabs(min(unpadded_relevance))) * 3 # Separate fragments in Combined Mol idxs_frag = rdmolops.GetMolFrags(mol) mols_frag = rdmolops.GetMolFrags(mol, asMols=True) # Draw fragment and interaction for i, (mol_frag, idx_frag) in enumerate(zip(mols_frag[1:], idxs_frag[1:])): # Ignore water if mol_frag.GetNumAtoms() == 1: continue # Generate 2D image mol_combined = Chem.CombineMols(mols_frag[0], mol_frag) AllChem.Compute2DCoords(mol_combined) fig = Draw.MolToMPL(mol_combined, coordScale=1) fig.axes[0].set_axis_off() # Draw line between close atoms (5A) flag = False for j in range(mol_ligand.GetNumAtoms()): for k in idx_frag: if distance[j, k] <= 5: # Draw connection coord_li = mol_combined._atomPs[j] coord_po = mol_combined._atomPs[ idx_frag.index(k) + mols_frag[0].GetNumAtoms()] x, y = np.array([[coord_li[0], coord_po[0]], [coord_li[1], coord_po[1]]]) line = Line2D(x, y, color='b', linewidth=1, alpha=0.3) fig.axes[0].add_line(line) flag = True # Draw heatmap for atoms for j in range(mol_combined.GetNumAtoms()): relevance_li = unpadded_relevance[j] relevance_li = relevance_li / scale + 0.5 highlight = plt.Circle( (mol_combined._atomPs[j][0], mol_combined._atomPs[j][1]), 0.035 * math.fabs(unpadded_relevance[j] / scale) + 0.008, color=colormap(relevance_li), alpha=0.8, zorder=0) fig.axes[0].add_artist(highlight) # Save if flag: fig_name = fig_path + '/{}_lrp_{}_{}_{}.png'.format( trial, test_idx, pdb_code, i) fig.savefig(fig_name, bbox_inches='tight') plt.close(fig)
def test_dataset(path, dataset=1, scenario=1, n_runs=3, n_subsets=3, k=3, c=1, pca=0): # Variables data = Dataset() runs_performance = {} # File Variables wb = load_workbook(path) ws = wb.active row = ws.max_row + 1 ws.title = 'Test Results' # Select test data data.choose_data(dataset) # print(data.database_selected_str, "data loaded.") # Pre-process data data.scenario_pre_processing(scenario) # print("Finished pre-processing data for", data.scenario_selected_str) # Apply Kruskal-Wallis data.set_feature_selection_method(2) data.dataset = kruskal_wallis(data.dataset, len(data.dataset['label'])) # print("Finished applying kruskal-wallis feature selection method.") # Apply Correlation redundancy measure data.dataset, unused_label = redundancy_measure(data.dataset) # print("Correlation rendundancy measure applied.") # print("Begining tests...this might take a while") if pca == 1: data.dataset = run_pca(data.dataset, len(data.dataset['label'])) # For all 5 classifiers for classifier in range(1, 6): # Variable to hold all runs for all classifiers runs_performance[classifier] = {} if classifier == 5: n_runs = int(n_runs / 5) n_subsets = 3 # Run "n_runs" tests for run in range(0, n_runs): # Structure to hold results of classification performance = { 'fp': 0, 'fn': 0, 'tp': 0, 'tn': 0, 'accuracy': 0, 'avg_misclassification': 0, 'misclassification_per_fold': [], 'avg_misclassification_per_fold': [], 'sensitivity': 0, 'specificity': 0 } print("run %s for classifier %s" % (str(run), str(classifier))) # Create dict to save run results runs_performance[classifier][run] = {} # Apply K-fold: splitting the dataset kf = KFold(n_splits=n_subsets, shuffle=True) # K-fold Executions for idx_train, idx_test in kf.split(data.dataset["data"], data.dataset["target"]): # Classification prediction prediction = [] # Prepare data for training x_train = [data.dataset["data"][idx] for idx in idx_train] x_train = np.asarray(x_train).astype(np.float64) y_train = [data.dataset["target"][idx] for idx in idx_train] # Prepare data for testing x_test = [data.dataset["data"][idx] for idx in idx_test] x_test = np.asarray(x_test).astype(np.float64) y_test = [data.dataset["target"][idx] for idx in idx_test] # Minimum distance classifier (MDC) if classifier == 1: prediction = minimum_distance_classifier( x_train, y_train, x_test, y_test) # Fisher Discriminant Analisys (Fisher LDA) elif classifier == 2: prediction = fisher_discriminant_analisys( x_train, y_train, x_test, y_test) # K-Nearest Neighbors (KNN) elif classifier == 3: prediction = k_nearest_neighbors(x_train, y_train, x_test, y_test, k) # Bayes Classifier elif classifier == 4: prediction = bayes_classifier(x_train, y_train, x_test, y_test) # Support Vector Machines elif classifier == 5: prediction = support_vector_machines( x_train, y_train, x_test, y_test, c) # Performance measurement performance = performance_measurement(y_test, prediction, scenario, performance) # Calculate averages performance['avg_misclassification'] /= n_subsets performance['sensitivity'] /= n_subsets performance['specificity'] /= n_subsets performance['accuracy'] /= n_subsets # Set Layout set_layout(ws, scenario) # Add values into the sheet ws.cell(column=1, row=row, value=dataset) ws.cell(column=2, row=row, value=run) ws.cell(column=3, row=row, value=classifier) set_values(ws, scenario, performance, row) row += 1 # Save performance measurement per run runs_performance[classifier][run]["performance"] = performance runs_performance[classifier][run]["scenario"] = scenario # For debug # for classifier in runs_performance: # for run in runs_performance[classifier]: # print("Classifier ", classifier, " run", run) # print(runs_performance[classifier][run]) return wb
class TestModel(unittest.TestCase): def setUpClass(): logging.basicConfig(level=logging.DEBUG) def setUp(self): self.logger = logging.getLogger() self.testFiles = [] self.itemFile = Datafile('test.model.items.xml', self.id()) self.itemFileAuctionOnly = Datafile('test.model.items.auction_only.xml', self.id()) self.sessionFile = Datafile('test.model.session.xml', self.id()) self.currencyFile = Datafile('test.model.currency.xml', self.id()) self.importFileCsv = Datafile('test.model.import.csv', self.id()) self.importFileTxt = Datafile('test.model.import.txt', self.id()) self.dataset = Dataset( self.logger, './', self.sessionFile.getFilename(), self.itemFile.getFilename(), self.currencyFile.getFilename()) self.dataset.restore() self.currency = Currency( self.logger, self.dataset, currencyCodes=['czk', 'eur']) self.model = Model( self.logger, self.dataset, self.currency) def tearDown(self): self.itemFile.clear() self.sessionFile.clear() self.currencyFile.clear() self.importFileCsv.clear() self.importFileTxt.clear() for file in self.testFiles: file.clear() del self.model del self.currency del self.dataset def restoreTestFile(self, filename): testFile = Datafile(filename, self.id()) self.testFiles.append(testFile) return testFile def test_getItem(self): item = self.model.getItem('A2') self.assertDictContainsSubset({ItemField.CODE: 'A2'}, item) self.assertListEqual([Decimal('250'), Decimal('9.21')], [currency[CurrencyField.AMOUNT] for currency in item[ItemField.INITIAL_AMOUNT_IN_CURRENCY]]) self.assertListEqual([Decimal('300'), Decimal('11.06')], [currency[CurrencyField.AMOUNT] for currency in item[ItemField.AMOUNT_IN_CURRENCY]]) self.assertListEqual([], [currency[CurrencyField.AMOUNT] for currency in item[ItemField.AMOUNT_IN_AUCTION_IN_CURRENCY]]) def test_addNewItem(self): sessionID = 11111 # add (on show) self.assertEqual( self.model.addNewItem(sessionID, 23, 'Mysteria', 'Wolf', 'Pastel', None, None, None), Result.SUCCESS) addedItem = self.dataset.getItems('Owner=="23" and Title=="Mysteria" and Author=="Wolf"')[0] self.assertDictContainsSubset({ ItemField.STATE: ItemState.ON_SHOW, ItemField.MEDIUM: 'Pastel', ItemField.NOTE: None}, addedItem); # duplicate add self.assertEqual( self.model.addNewItem(sessionID, 23, 'Mysteria', 'Wolf', None, None, None, None), Result.DUPLICATE_ITEM) # add (on sale) (amount/charity is converted but search expression assumes strings) self.assertEqual( self.model.addNewItem(sessionID, 35, 'Mysteria', 'Tiger', '', 123.5, 10, 'Good Stuff'), Result.SUCCESS) addedItem = self.dataset.getItems('Owner=="35" and Title=="Mysteria" and Author=="Tiger" and Charity=="10" and InitialAmount=="123.5"')[0] self.assertDictContainsSubset({ ItemField.INITIAL_AMOUNT: 123.5, ItemField.CHARITY: 10, ItemField.STATE: ItemState.ON_SALE, ItemField.MEDIUM: None, ItemField.NOTE: 'Good Stuff'}, addedItem); # add (quotes) self.assertEqual( self.model.addNewItem(sessionID, 98, 'Quotted"Title', 'Qu"es', 'Photo', None, None, 'Do not touch.'), Result.SUCCESS) # add (empty parameters) self.assertEqual( self.model.addNewItem(sessionID, 99, 'Strong', 'Lemur', None, None, None, ''), Result.SUCCESS) addedItem = self.dataset.getItems('Owner=="99" and Title=="Strong" and Author=="Lemur"')[0] self.assertDictContainsSubset({ ItemField.MEDIUM: None, ItemField.NOTE: None}, addedItem); # add item from an import importNumber = 100 self.assertEqual( self.model.addNewItem(sessionID, 99, 'Shy', 'Lemur', None, None, None, '', importNumber), Result.SUCCESS) addedItem = self.dataset.getItems('Owner=="99" and Title=="Shy" and Author=="Lemur"')[0] self.assertDictContainsSubset({ ItemField.CODE: str(importNumber), ItemField.IMPORT_NUMBER: importNumber}, addedItem); # add updated item (differs in amount/charity) self.assertEqual( self.model.addNewItem(sessionID, 99, 'Shy', 'Lemur', None, '12.5', 100, 'Some note', importNumber), Result.DUPLICATE_IMPORT_NUMBER) # add updated item (differs in name) self.assertEqual( self.model.addNewItem(sessionID, 99, 'Smiling', 'Lemur', None, None, None, 'Some note', importNumber), Result.DUPLICATE_IMPORT_NUMBER) # add item from an import with a and import number that matches an existing code importNumber = 3 self.assertEqual(len(self.dataset.getItems('Code=="{0}"'.format(importNumber))), 0) self.assertEqual( self.model.addNewItem(sessionID, 99, 'Funny', 'Cat', None, None, None, '', importNumber), Result.SUCCESS_BUT_IMPORT_RENUMBERED) addedItem = self.dataset.getItems('Owner=="99" and Title=="Funny" and Author=="Cat"')[0] self.assertDictContainsSubset({ ItemField.IMPORT_NUMBER: importNumber}, addedItem); # added list addedItemCodes = self.model.getAdded(sessionID) self.assertEqual(len(addedItemCodes), 6); def test_getAddedItems(self): sessionID = 11111 # add items self.assertEqual(self.model.addNewItem(sessionID, 23, 'Mysteria', 'Wolf', 'Oil', None, None, None), Result.SUCCESS) self.assertEqual(self.model.addNewItem(sessionID, 35, 'Mysteria', 'Tiger', 'Pencil', '123', '10', None), Result.SUCCESS) # get added items addedItems = self.model.getAddedItems(sessionID) self.assertEqual(len(addedItems), 2); item = [item for item in addedItems if item[ItemField.OWNER] == 23][0] self.assertListEqual([], [currencyAmount[CurrencyField.AMOUNT] for currencyAmount in item[ItemField.INITIAL_AMOUNT_IN_CURRENCY]]) item = [item for item in addedItems if item[ItemField.OWNER] == 35][0] self.assertListEqual( [Decimal('123'), Decimal('4.53')], [currencyAmount[CurrencyField.AMOUNT] for currencyAmount in item[ItemField.INITIAL_AMOUNT_IN_CURRENCY]]) def test_updateItem(self): # update item self.assertEqual( self.model.updateItem(56, owner=1, title='Wolf', author='Greenwolf', medium='Color Pencils', state=ItemState.ON_SALE, initialAmount='105', charity='50', amount=None, buyer=None, note=None), Result.SUCCESS) updatedItem = self.dataset.getItems('Owner=="1" and Title=="Wolf" and Author=="Greenwolf" and Medium=="Color Pencils"')[0] self.assertDictContainsSubset({ ItemField.STATE: ItemState.ON_SALE, ItemField.INITIAL_AMOUNT: 105, ItemField.CHARITY: 50, ItemField.AMOUNT: None, ItemField.NOTE: None}, updatedItem); self.assertIsNone(updatedItem[ItemField.AMOUNT]); self.assertIsNone(updatedItem[ItemField.BUYER]); # update item (range error of charity) self.assertEqual( self.model.updateItem(56, owner=1, title='Wolf', author='Greenwolf', medium='Color Pencils', state=ItemState.FINISHED, initialAmount='105', charity='150', amount='200', buyer='20', note=None), Result.INVALID_VALUE) # update item (consistency error) self.assertEqual( self.model.updateItem(56, owner=1, title='Wolf', author='Greenwolf', medium='Color Pencils', state=ItemState.FINISHED, initialAmount='105', charity='10', amount=None, buyer=None, note=None), Result.AMOUNT_NOT_DEFINED) def test_deleteItems(self): # 1. Delete item self.assertEqual(self.model.deleteItems(['A11', 'A2', 'A999']), 2) self.assertIsNone(self.model.getItem('A11')); self.assertIsNone(self.model.getItem('A2')); self.assertIsNone(self.model.getItem('A999')); def test_getItemNetAmount(self): item = self.model.getItem('A2') amountNet, amountCharity = self.model.getItemNetAmount(item) self.assertEqual(amountNet, Decimal('270')) self.assertEqual(amountCharity, Decimal('30')) def test_getPotentialCharityAmount(self): charityAmount = self.model.getPotentialCharityAmount() self.assertEqual(charityAmount, Decimal('299')) def test_getBadgeReconciliationSummary(self): # Owner that has no delivered item self.logger.info('Badge 1') summary = self.model.getBadgeReconciliationSummary(1) self.assertEqual(summary[SummaryField.GROSS_SALE_AMOUNT], Decimal('0')) self.assertEqual(summary[SummaryField.CHARITY_DEDUCTION], Decimal('0')) self.assertEqual(summary[SummaryField.BOUGHT_ITEMS_AMOUNT], Decimal('350')) self.assertEqual(summary[SummaryField.TOTAL_DUE_AMOUNT], Decimal('350')) self.assertEqual(len(summary[SummaryField.AVAILABLE_UNSOLD_ITEMS]), 2) self.assertEqual(len(summary[SummaryField.AVAILABLE_BOUGHT_ITEMS]), 2) self.assertEqual(len(summary[SummaryField.PENDING_SOLD_ITEMS]), 2) self.assertEqual(len(summary[SummaryField.DELIVERED_SOLD_ITEMS]), 0) # Owner that has just delivered items self.logger.info('Badge 2') summary = self.model.getBadgeReconciliationSummary(2) self.assertEqual(summary[SummaryField.GROSS_SALE_AMOUNT], Decimal('447')) self.assertEqual(summary[SummaryField.CHARITY_DEDUCTION], Decimal('49')) self.assertEqual(summary[SummaryField.BOUGHT_ITEMS_AMOUNT], Decimal('0')) self.assertEqual(summary[SummaryField.TOTAL_DUE_AMOUNT], Decimal('-398')) self.assertEqual(len(summary[SummaryField.AVAILABLE_UNSOLD_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.AVAILABLE_BOUGHT_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.PENDING_SOLD_ITEMS]), 3) self.assertEqual(len(summary[SummaryField.DELIVERED_SOLD_ITEMS]), 2) # Owner that has delivered items and bought items self.logger.info('Badge 4') summary = self.model.getBadgeReconciliationSummary(4) self.assertEqual(summary[SummaryField.GROSS_SALE_AMOUNT], Decimal('235')) self.assertEqual(summary[SummaryField.CHARITY_DEDUCTION], Decimal('36')) self.assertEqual(summary[SummaryField.BOUGHT_ITEMS_AMOUNT], Decimal('57')) self.assertEqual(summary[SummaryField.TOTAL_DUE_AMOUNT], Decimal('-142')) self.assertEqual(len(summary[SummaryField.AVAILABLE_UNSOLD_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.AVAILABLE_BOUGHT_ITEMS]), 1) self.assertEqual(len(summary[SummaryField.PENDING_SOLD_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.DELIVERED_SOLD_ITEMS]), 2) # Owner that has items either finished, not delivered, or unsold self.logger.info('Badge 6') summary = self.model.getBadgeReconciliationSummary(6) self.assertEqual(summary[SummaryField.GROSS_SALE_AMOUNT], Decimal('0')) self.assertEqual(summary[SummaryField.CHARITY_DEDUCTION], Decimal('0')) self.assertEqual(summary[SummaryField.BOUGHT_ITEMS_AMOUNT], Decimal('0')) self.assertEqual(summary[SummaryField.TOTAL_DUE_AMOUNT], Decimal('0')) self.assertEqual(len(summary[SummaryField.AVAILABLE_UNSOLD_ITEMS]), 1) self.assertEqual(len(summary[SummaryField.AVAILABLE_BOUGHT_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.PENDING_SOLD_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.DELIVERED_SOLD_ITEMS]), 0) # Buyer that has just bought items and some of the bought items are finished self.logger.info('Badge 11') summary = self.model.getBadgeReconciliationSummary(11) self.assertEqual(summary[SummaryField.GROSS_SALE_AMOUNT], Decimal('0')) self.assertEqual(summary[SummaryField.CHARITY_DEDUCTION], Decimal('0')) self.assertEqual(summary[SummaryField.BOUGHT_ITEMS_AMOUNT], Decimal('429')) self.assertEqual(summary[SummaryField.TOTAL_DUE_AMOUNT], Decimal('429')) self.assertEqual(len(summary[SummaryField.AVAILABLE_UNSOLD_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.AVAILABLE_BOUGHT_ITEMS]), 3) self.assertEqual(len(summary[SummaryField.PENDING_SOLD_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.DELIVERED_SOLD_ITEMS]), 0) # Buyer that has items either in auction or finished self.logger.info('Badge 12') summary = self.model.getBadgeReconciliationSummary(12) self.assertEqual(summary[SummaryField.GROSS_SALE_AMOUNT], Decimal('0')) self.assertEqual(summary[SummaryField.CHARITY_DEDUCTION], Decimal('0')) self.assertEqual(summary[SummaryField.BOUGHT_ITEMS_AMOUNT], Decimal('0')) self.assertEqual(summary[SummaryField.TOTAL_DUE_AMOUNT], Decimal('0')) self.assertEqual(len(summary[SummaryField.AVAILABLE_UNSOLD_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.AVAILABLE_BOUGHT_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.PENDING_SOLD_ITEMS]), 0) self.assertEqual(len(summary[SummaryField.DELIVERED_SOLD_ITEMS]), 0) def test_reconciliateBadge(self): # Badge 1 contains: # * sold item which has not been paid for (code: A2) # * self-sale of an item (code: 56) summaryBefore = self.model.getBadgeReconciliationSummary(1) self.assertTrue(self.model.reconciliateBadge(1)) summaryAfter = self.model.getBadgeReconciliationSummary(1) self.assertEqual(summaryAfter[SummaryField.GROSS_SALE_AMOUNT], Decimal('200')) self.assertEqual(summaryAfter[SummaryField.CHARITY_DEDUCTION], Decimal('20')) self.assertEqual(summaryAfter[SummaryField.BOUGHT_ITEMS_AMOUNT], Decimal('0')) self.assertEqual(summaryAfter[SummaryField.TOTAL_DUE_AMOUNT], Decimal('-180')) self.assertListEqual( [], summaryAfter[SummaryField.AVAILABLE_UNSOLD_ITEMS]) self.assertListEqual( [], summaryAfter[SummaryField.AVAILABLE_BOUGHT_ITEMS]) self.assertListEqual( ['A2'], [item[ItemField.CODE] for item in summaryAfter[SummaryField.PENDING_SOLD_ITEMS]]) self.assertListEqual( ['56'], [item[ItemField.CODE] for item in summaryAfter[SummaryField.DELIVERED_SOLD_ITEMS]]) for itemUnsoldBefore in summaryBefore[SummaryField.AVAILABLE_UNSOLD_ITEMS]: self.assertEqual( self.model.getItem(itemUnsoldBefore[ItemField.CODE])[ItemField.STATE], ItemState.FINISHED, 'Item {0}'.format(itemUnsoldBefore[ItemField.CODE])) for itemBoughtBefore in summaryBefore[SummaryField.AVAILABLE_BOUGHT_ITEMS]: self.assertEqual( self.model.getItem(itemBoughtBefore[ItemField.CODE])[ItemField.STATE], ItemState.DELIVERED, 'Item {0}'.format(itemBoughtBefore[ItemField.CODE])) for itemDeliveredBefore in summaryBefore[SummaryField.DELIVERED_SOLD_ITEMS]: self.assertEqual( self.model.getItem(itemDeliveredBefore[ItemField.CODE])[ItemField.STATE], ItemState.FINISHED, 'Item {0}'.format(itemDeliveredBefore[ItemField.CODE])) def test_summaryChecksum(self): summaryA = self.model.getBadgeReconciliationSummary(1) summaryB = self.model.getBadgeReconciliationSummary(11) self.assertNotEqual(Summary.calculateChecksum(summaryA), Summary.calculateChecksum(summaryB)) def test_getCashDrawerSummary(self): summary = self.model.getCashDrawerSummary() self.assertIsNotNone(summary) self.assertEqual(summary[DrawerSummaryField.TOTAL_GROSS_CASH_DRAWER_AMOUNT], Decimal('709')) self.assertEqual(summary[DrawerSummaryField.TOTAL_NET_CHARITY_AMOUNT], Decimal('112')) self.assertEqual(summary[DrawerSummaryField.TOTAL_NET_AVAILABLE_AMOUNT], Decimal('597')) self.assertListEqual( sorted([actorSummary.Badge for actorSummary in summary[DrawerSummaryField.BUYERS_TO_BE_CLEARED]]), [1, 3, 4, 11, 13]) self.assertListEqual( sorted([actorSummary.Badge for actorSummary in summary[DrawerSummaryField.OWNERS_TO_BE_CLEARED]]), [1, 2, 3, 4, 6, 7]) self.assertEqual(len(summary[DrawerSummaryField.PENDING_ITEMS]), 3) def test_importItemsFromCsv(self): # 1. Import sessionID = 11111 binaryStream = io.open(self.importFileCsv.getFilename(), mode='rb') importedItems, importedChecksum = self.model.importCSVFile(sessionID, binaryStream) binaryStream.close() # 2. Verify self.assertEqual(len(importedItems), 13) self.assertEqual(importedItems[0][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[1][ImportedItemField.IMPORT_RESULT], Result.DUPLICATE_ITEM) self.assertEqual(importedItems[2][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[3][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[4][ImportedItemField.IMPORT_RESULT], Result.INVALID_CHARITY) self.assertEqual(importedItems[5][ImportedItemField.IMPORT_RESULT], Result.INCOMPLETE_SALE_INFO) self.assertEqual(importedItems[6][ImportedItemField.IMPORT_RESULT], Result.INVALID_AMOUNT) self.assertEqual(importedItems[7][ImportedItemField.IMPORT_RESULT], Result.INVALID_AUTHOR) self.assertEqual(importedItems[8][ImportedItemField.IMPORT_RESULT], Result.INVALID_TITLE) self.assertEqual(importedItems[9][ImportedItemField.IMPORT_RESULT], Result.DUPLICATE_ITEM) self.assertEqual(importedItems[10][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[11][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[12][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) # 3. Apply defaultOwner = 2 result, skippedItems, renumberedItems = self.model.applyImport(sessionID, importedChecksum, defaultOwner) self.assertEqual(result, Result.SUCCESS) self.assertEqual(len(self.model.getAdded(sessionID)), 6) self.assertEqual(len(self.dataset.getItems( 'Owner=="{0}" and Title=="Smooth \\\"Frog\\\"" and Author=="Greentiger" and State=="{1}" and InitialAmount=="120" and Charity=="47"'.format( defaultOwner, ItemState.ON_SALE))), 1) self.assertEqual(len(self.dataset.getItems( 'Owner=="{0}" and Title=="Draft Horse" and Author=="Greentiger" and State=="{1}" and InitialAmount=="500" and Charity=="0"'.format( defaultOwner, ItemState.ON_SALE))), 1) self.assertEqual(len(self.dataset.getItems( 'Owner=="{0}" and Title=="Žluťoučký kůň" and Author=="Greentiger" and State=="{1}"'.format( defaultOwner, ItemState.ON_SHOW))), 1) self.assertEqual(len(self.dataset.getItems( 'Owner=="{0}" and Title=="Eastern Dragon" and Author=="Redwolf" and State=="{1}"'.format( defaultOwner, ItemState.SOLD))), 1) self.assertEqual(len(self.dataset.getItems( 'Owner=="7" and Title=="More Wolves" and Author=="Greenfox" and State=="{0}" and InitialAmount=="280" and Charity=="50"'.format( ItemState.ON_SALE))), 1) # 4. Re-apply result, skippedItems, renumberedItems = self.model.applyImport(sessionID, importedChecksum, defaultOwner) self.assertEqual(result, Result.NO_IMPORT) # 5. Re-apply with invalid checksum binaryStream = io.open(self.importFileCsv.getFilename(), mode='rb') importedItems, importedChecksum = self.model.importCSVFile(sessionID, binaryStream) binaryStream.close() result, skippedItems, renumberedItems = self.model.applyImport(sessionID, importedChecksum + 50, defaultOwner) self.assertEqual(result, Result.INVALID_CHECKSUM) def test_importItemsFromCsv_ImportNumberReuse(self): # Verify next code. This is crucial for the last test. NEXT_AVAILABLE_CODE = 57 # 1. Import importFile = self.restoreTestFile('test.model.import_number.csv'); sessionID = 11111 binaryStream = io.open(importFile.getFilename(), mode='rb') importedItems, importedChecksum = self.model.importCSVFile(sessionID, binaryStream) binaryStream.close() # 2. Verify self.assertEqual(len(importedItems), 11) self.assertEqual(importedItems[0][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[1][ImportedItemField.IMPORT_RESULT], Result.DUPLICATE_ITEM) self.assertEqual(importedItems[2][ImportedItemField.IMPORT_RESULT], Result.DUPLICATE_ITEM) self.assertEqual(importedItems[3][ImportedItemField.IMPORT_RESULT], Result.DUPLICATE_ITEM) self.assertEqual(importedItems[4][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[5][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[6][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[7][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[8][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[9][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[10][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) # 3. Apply defaultOwner = 2 result, skippedItems, renumberedItems = self.model.applyImport(sessionID, importedChecksum, defaultOwner) self.assertEqual(Result.SUCCESS, result) self.assertEqual(7, len(self.model.getAdded(sessionID))) # 3a. Check that if Import Number is missing, it will be left empty. self.assertDictContainsSubset( { ItemField.IMPORT_NUMBER: None }, self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Moon cycles"')[0]) # 3b. Check that a duplicate Import Number in the import was not imported. self.assertEqual( 0, len(self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Fullmoon"'))) self.assertEqual( 0, len(self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="No moon"'))) # 3c. Check that an existing item with a matching Import Number has not been updated # in case there were changes. self.assertEqual( 1, len(self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Half moon"'))) updatedItem = self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Half moon"')[0] self.assertIn(updatedItem[ItemField.CODE], self.model.getAdded(sessionID)) # 3d. Check that an existing item with a matching Import Number has not been updated. nonupdatedItem = self.dataset.getItems('Owner=="7" and Author=="Greenfox" and Title=="White Snow"')[0] self.assertNotIn(nonupdatedItem[ItemField.CODE], self.model.getAdded(sessionID)) # 3e. Check that item which import number might have been used earlier is renumbered. renumberedItem = self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Day phases"')[0] self.assertNotEqual('45', renumberedItem[ItemField.CODE]); self.assertEqual(45, renumberedItem[ItemField.IMPORT_NUMBER]); # 3f. Check that Import Number is used case Code if the Code might not have been used previously. self.assertDictContainsSubset( { ItemField.CODE: '80', ItemField.IMPORT_NUMBER: 80 }, self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Morning"')[0]) self.assertDictContainsSubset( { ItemField.CODE: '90', ItemField.IMPORT_NUMBER: 90 }, self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Afternoon"')[0]) # 3g. Check that order of occurance in the import has no impact on ability to use Import Number as Code self.assertDictContainsSubset( { ItemField.CODE: '85', ItemField.IMPORT_NUMBER: 85 }, self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Noon"')[0]) # 3f. Check that if Import Number might be used as Code at the start of the import, it will be used as Code. # No unnumbered or re-numbered item will prevent that. self.assertDictContainsSubset( { ItemField.CODE: str(NEXT_AVAILABLE_CODE), ItemField.IMPORT_NUMBER: NEXT_AVAILABLE_CODE }, self.dataset.getItems('Owner=="7" and Author=="Redpanda" and Title=="Day"')[0]) def test_importItemsFromText(self): textStream = io.open(self.importFileTxt.getFilename(), mode='rt', encoding='utf-8') text = '\n'.join(textStream.readlines()) textStream.close() # 1. Import sessionID = 11111 importedItems, importedChecksum = self.model.importText(sessionID, text) # 2. Verify self.assertEqual(len(importedItems), 10) self.assertEqual(importedItems[0][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[1][ImportedItemField.IMPORT_RESULT], Result.SUCCESS) self.assertEqual(importedItems[2][ImportedItemField.IMPORT_RESULT], Result.INVALID_CHARITY) self.assertEqual(importedItems[3][ImportedItemField.IMPORT_RESULT], Result.INCOMPLETE_SALE_INFO) self.assertEqual(importedItems[4][ImportedItemField.IMPORT_RESULT], Result.INVALID_AMOUNT) self.assertEqual(importedItems[5][ImportedItemField.IMPORT_RESULT], Result.INVALID_AUTHOR) self.assertEqual(importedItems[6][ImportedItemField.IMPORT_RESULT], Result.INVALID_TITLE) self.assertEqual(importedItems[7][ImportedItemField.IMPORT_RESULT], Result.DUPLICATE_ITEM) self.assertEqual(importedItems[8][ImportedItemField.IMPORT_RESULT], Result.DUPLICATE_ITEM) self.assertEqual(importedItems[9][ImportedItemField.IMPORT_RESULT], Result.DUPLICATE_ITEM) # 3. Apply owner = 2 result, skippedItems, renumberedItems = self.model.applyImport(sessionID, importedChecksum, owner) self.assertEqual(result, Result.SUCCESS) self.assertEqual(len(self.model.getAdded(sessionID)), 2) self.assertEqual(len(self.dataset.getItems( 'Owner=="{0}" and Title=="Smooth Frog" and Author=="Greentiger" and State=="{1}" and InitialAmount=="120" and Charity=="47"'.format( owner, ItemState.ON_SALE))), 1) self.assertEqual(len(self.dataset.getItems( 'Owner=="{0}" and Title=="Žluťoučký kůň" and Author=="Greentiger" and State=="{1}"'.format( owner, ItemState.ON_SHOW))), 1) self.assertEqual(len(self.dataset.getItems( 'Owner=="{0}" and Title=="Eastern Dragon" and Author=="Redwolf" and State=="{1}"'.format( owner, ItemState.SOLD))), 1) def test_getNetAmount(self): # Regular amount grossAmount = 253 saleAmount, charityAmount = self.model.getNetAmount(Decimal(grossAmount), 47) self.assertEqual((saleAmount, charityAmount), (134, 119)) self.assertEqual(saleAmount + charityAmount, grossAmount) # Excessive amount self.assertEqual(self.model.getNetAmount(Decimal('1E+34'), 14), (0, 0)) # Invalid amount self.assertEqual(self.model.getNetAmount(None, 23), (0, 0)) def test_getSendItemToAuction(self): # Item of acceptable state (AUCT) item = self.model.sendItemToAuction('A10') self.assertIsNotNone(item) self.assertDictContainsSubset( { ItemField.CODE:'A10', ItemField.AMOUNT_IN_AUCTION:item[ItemField.AMOUNT]}, self.model.getItemInAuction()) self.model.clearAuction() self.assertIsNone(self.model.getItemInAuction()) # Item of invalid state (SOLD) self.assertIsNone(self.model.sendItemToAuction('A13')) self.assertIsNone(self.model.getItemInAuction()) def test_closeItemAsNotSold(self): # Close item self.assertEqual(Result.SUCCESS, self.model.closeItemAsNotSold('55')) item = self.model.getItem('55') self.assertDictContainsSubset( { ItemField.STATE: ItemState.NOT_SOLD, ItemField.BUYER: None, ItemField.AMOUNT: None}, item) # Close item which is not closable self.assertEqual(Result.ITEM_NOT_CLOSABLE, self.model.closeItemAsNotSold('A13')) def test_closeItemAsSold(self): # Close item self.assertEqual(Result.SUCCESS, self.model.closeItemAsSold('55', Decimal(1000), 9999)) item = self.dataset.getItems('Buyer=="{0}"'.format(9999))[0] self.assertDictContainsSubset( { ItemField.STATE: ItemState.SOLD, ItemField.BUYER: 9999, ItemField.AMOUNT: Decimal(1000)}, item) # Close item which is not closable self.assertEqual(Result.ITEM_NOT_CLOSABLE, self.model.closeItemAsSold('A13', Decimal(1000), 9999)) def test_closeItemIntoAuction(self): # Close item self.assertEqual(Result.SUCCESS, self.model.closeItemIntoAuction('55', Decimal(1000), 9999, None)) item = self.dataset.getItems('Buyer=="{0}"'.format(9999))[0] self.assertDictContainsSubset( { ItemField.STATE: ItemState.IN_AUCTION, ItemField.BUYER: 9999, ItemField.AMOUNT: Decimal(1000)}, item) # Close item which is not closable self.assertEqual(Result.ITEM_NOT_CLOSABLE, self.model.closeItemIntoAuction('A13', Decimal(1000), 9999, None)) def test_getAllItemsInAuction(self): auctionItems = self.model.getAllItemsInAuction() self.assertListEqual( ['A9', 'A10'], [item[ItemField.CODE] for item in auctionItems]); def test_getAllItemsInAuction_Ordering(self): datasetAuction = Dataset( self.logger, './', self.sessionFile.getFilename(), self.itemFileAuctionOnly.getFilename(), self.currencyFile.getFilename()) datasetAuction.restore() modelAuction = Model( self.logger, datasetAuction, self.currency) auctionItems = modelAuction.getAllItemsInAuction() auctionItems.sort(key=lambda item: item[ItemField.AUCTION_SORT_CODE]) for item in auctionItems: print('{0} - {1}'.format(item[ItemField.AUTHOR], item[ItemField.AMOUNT])) # Check that there is no block authors larger than two largestBlockSize = 0 largestBlockAuthor = None blockAuthor = None blockSize = 0 for item in auctionItems: if blockAuthor is not None and item[ItemField.AUTHOR] == blockAuthor: blockSize = blockSize + 1 else: if blockSize > largestBlockSize: largestBlockSize = blockSize largestBlockAuthor = blockAuthor blockAuthor = item[ItemField.AUTHOR] blockSize = 1 self.assertGreaterEqual(2, largestBlockSize, 'Author: ' + str(largestBlockAuthor)) def test_generateDeviceCode(self): adminSessionID = self.model.startNewSession(UserGroups.ADMIN, '127.0.0.1') sessionID = self.model.startNewSession(UserGroups.UNKNOWN, '192.168.0.1') # If multiple numbers are generated per session, only the last one is valid deviceCode1 = self.model.generateDeviceCode(sessionID) self.assertIsNotNone(deviceCode1) deviceCode2 = self.model.generateDeviceCode(sessionID) self.assertIsNotNone(deviceCode2) self.assertEqual(Result.DISABLED_DEVICE_CODE, self.model.approveDeviceCode(adminSessionID, deviceCode1, UserGroups.SCAN_DEVICE)) self.assertEqual(Result.SUCCESS, self.model.approveDeviceCode(adminSessionID, deviceCode2, UserGroups.SCAN_DEVICE)) def test_getSessionUserGroup(self): adminSessionID = self.model.startNewSession(UserGroups.ADMIN, '127.0.0.1') sessionID = self.model.startNewSession(UserGroups.UNKNOWN, '192.168.0.1') self.assertIsNotNone(sessionID) # User group is defined in session self.assertEqual(UserGroups.ADMIN, self.model.getSessionUserGroup(adminSessionID)) self.assertEqual(UserGroups.UNKNOWN, self.model.getSessionUserGroup(sessionID)) # If a device code is approved, associated user group is used. deviceCode = self.model.generateDeviceCode(sessionID) self.assertEqual(Result.SUCCESS, self.model.approveDeviceCode(adminSessionID, deviceCode, UserGroups.SCAN_DEVICE)) self.assertEqual(UserGroups.SCAN_DEVICE, self.model.getSessionUserGroup(sessionID)) # If a device code is dropped, user group is UNKNOWN. self.model.dropDeviceCode(adminSessionID, deviceCode) self.assertEqual(UserGroups.UNKNOWN, self.model.getSessionUserGroup(sessionID))