def __getitem__(self, index): img_path = self.base_dir / self.img_paths[index] bbox = change_bbox(self.bbox[index], 1.4, use_forehead=False) img = np.array(Image.open(img_path).crop(bbox)) label = self.labels[index].copy() if self.use_bined: bined_label = self.euler_binned[index].copy() # ImageAugment (RandomBrightness, AddNoise...) if self.image_augmenter: augmented = self.image_augmenter(image=img) img = augmented['image'] # Resize (Scale & Pad & Crop) if self.resizer: resized = self.resizer(image=img) img = resized['image'] # AffineAugment (Horizontal Flip, Rotate...) if self.affine_augmenter: augmented = self.affine_augmenter(image=img) img = augmented['image'] if self.split=='train': # 图片左右翻转 if random.random() < 0.5: img = cv2.flip(img, 1) label[0] = -label[0] label[2] = -label[2] if self.use_bined: bined_label[0] = -(bined_label[0]-3)+3 bined_label[2] = -(bined_label[2]-9)+10 if random.random() < 0.5 and abs(label[0])<30 and abs(label[2])<30: if random.random() < 0.5: factor = 1 label[2] += 90 if self.use_bined: bined_label[2] = min(bined_label[2] + 10, 20) else: factor = 3 label[2] -= 90 if self.use_bined: bined_label[2] = max(bined_label[2] - 10, 0) img = np.ascontiguousarray(np.rot90(img, factor)) if self.n_class == 4: label = euler2quat(*label) if self.debug: print(self.img_paths[index], label) else: img = preprocess(img) img = img.transpose(2, 0, 1) img = torch.FloatTensor(img) label = torch.FloatTensor(label) if self.use_bined: return img, label, bined_label[0], bined_label[1], bined_label[2] else: return img, label
def input_queue_generator(input_reader_config): """ input_reader_config: a dict configure the input reader input path: tf record file path num_epochs: epoch of training input images, default is None (loop infinitely) num_readers: integer, the num of Readers to create shuffle: boolean, use RandomShuffleQueue to shuffle files and records capacity: integer, capacity of queue """ _, string_tensor = parallel_reader.parallel_read( input_reader_config.input_path, reader_class=tf.TFRecordReader, num_epochs=(input_reader_config.num_epochs if input_reader_config.num_epochs else None), num_readers=input_reader_config.num_readers, shuffle=input_reader_config.shuffle, dtypes=[tf.string, tf.string], capacity=input_reader_config.queue_capacity) tensor_dict = TfExampleDecoder().decode(string_tensor) # input image (convert to float32 type) tensor_dict['image'] = tf.to_float(tf.expand_dims(tensor_dict['image'], 0)) if input_reader_config.data_augmentation_ops: tensor_dict = preprocess(tensor_dict, input_reader_config.data_augmentation_ops) input_queue = BatchQueue( tensor_dict, batch_size=input_reader_config.batch_size, batch_queue_capacity=input_reader_config.batch_queue_capacity, num_batch_queue_threads=input_reader_config.num_batch_queue_threads, prefetch_queue_capacity=input_reader_config.prefetch_queue_capacity) return input_queue
def __getitem__(self, index): img_path = self.base_dir / self.img_paths[index] bbox = change_bbox(self.bboxs[index], 1.4, use_forehead=False) img = np.array(Image.open(img_path).crop(bbox)) label = self.labels[index].copy() # ImageAugment (RandomBrightness, AddNoise...) if self.image_augmenter: augmented = self.image_augmenter(image=img) img = augmented['image'] # Resize (Scale & Pad & Crop) if self.resizer: resized = self.resizer(image=img) img = resized['image'] # AffineAugment (Horizontal Flip, Rotate...) if self.affine_augmenter: augmented = self.affine_augmenter(image=img) img = augmented['image'] if self.debug: print(self.bboxs[index]) print(label) else: img = preprocess(img) img = img.transpose(2, 0, 1) img = torch.FloatTensor(img) label = torch.FloatTensor(label) return img, label, img, label, label
def get_one_img(self, index): image = self.db["images"][index] label = self.db["labels"][index] # print(label) # print(f"[INFO] Label: {label}") # ImageAugment (RandomBrightness, AddNoise...) if self.image_augmenter: augmented = self.image_augmenter(image=image) image = augmented['image'] # Resize (Scale & Pad & Crop) # if self.resizer: # resized = self.resizer(image=image) # image = resized['image'] # AffineAugment (Horizontal Flip, Rotate...) if self.affine_augmenter: augmented = self.affine_augmenter(image=image) image = augmented['image'] if self.debug: # print(label) return image else: image = preprocess(image) image = torch.FloatTensor(image).permute(2, 0, 1) # print(label) label = torch.FloatTensor(label) # print(f"[INFO] Label: {label.shape}") return image, label, image, label, label
def preprocess(self, im): preprocess_ops = [] for op_info in self.config.preprocess_infos: new_op_info = op_info.copy() op_type = new_op_info.pop('type') if op_type == 'Resize': new_op_info['arch'] = self.config.arch preprocess_ops.append(eval(op_type)(**new_op_info)) im, im_info = preprocess(im, preprocess_ops) inputs = create_inputs(im, im_info, self.config.arch) return inputs, im_info
def _create_losses(input_queue, num_classes, train_config): """Creates loss function for a DetectionModel. Args: input_queue: BatchQueue object holding enqueued tensor_dicts. num_classes: num of classes, integer Returns: Average sum of loss of given input batch samples with shape """ (images, groundtruth_boxes_list, groundtruth_classes_list, anchors_list) = _get_inputs(input_queue, num_classes, batch_size=train_config.batch_size) images = [ preprocess(image, im_height=train_config.im_height, im_width=train_config.im_width, preprocess_options=train_config.data_augmentation_ops) for image in images ] images = tf.concat(images, 0) net = RetinaNet() loc_preds, cls_preds = net(images, num_classes + 1, anchors=9) # get num of anchor overlapped with ground truth box cls_gt = [anchor.get_field("gt_labels") for anchor in anchors_list] loc_gt = [anchor.get_field("gt_encoded_boxes") for anchor in anchors_list] # pos anchor count for each image gt_anchor_nums = tf.map_fn( lambda x: tf.reduce_sum(tf.cast(tf.greater(x, 0), tf.int32)), cls_gt) # get valid anchor indices valid_anchor_indices = tf.squeeze(tf.where(tf.greater_equal(cls_gt, 0))) # skip ignored anchors (iou belong to 0.4 to 0.5) [valid_cls_preds, valid_cls_gt] = map(lambda x: tf.gather(x, valid_anchor_indices, axis=1), [cls_preds, cls_gt]) # classification loss: convert to onehot code cls_loss = tf.multiply(focal_loss(valid_cls_gt, valid_cls_preds), 1. / tf.to_float(gt_anchor_nums)) # location regression loss valid_cls_indices = tf.squeeze(tf.where(tf.greater(cls_gt, 0))) # skip negative and ignored anchors [valid_loc_preds, valid_loc_gt] = map(lambda x: tf.gather(x, valid_cls_indices, axis=1), [loc_preds, loc_gt]) loc_loss = regression_loss(valid_loc_preds, valid_loc_gt, weights=tf.expand_dims( 1. / tf.to_float(gt_anchor_nums), 1)) loss = (tf.reduce_sum(loc_loss) + tf.reduce_sum(cls_loss)) / tf.size( gt_anchor_nums, out_type=tf.float32) return loss
def main(): print(1) yolo = YOLONet() print(1) pascal = preprocess() print(1) solver = Solver(yolo, pascal) print(1) print('Start training ...') solver.train() print('Done training.')
def visualize_HDF5(model, HDF5_path, save_imgs_path=None, save_video_path=None, target_size_imgs = 64,): data = h5py.File(HDF5_path) print("[INFO] Drawing ...") for index in tqdm.tqdm(range(0, 2000)): # print(img_path) img = np.copy(data["images"][index][:,:,::-1]) height, width, channels = img.shape # print(height, width) poses = [] labeled_bboxs = [] cropped_frame = preprocess(np.array(img)) # print(cropped_frame) # Convert to tensor and transform to [-1, 1] cropped_frame = torch.FloatTensor(cropped_frame).permute(2, 0, 1).unsqueeze_(0) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") cropped_frame.to(device) # print(cropped_frame.size()) # predict preds = model(cropped_frame) preds = preds.cpu().detach().numpy() poses.append(preds[0]) labeled_bboxs.append([0, 0, 63, 63]) # print(bboxs) poses = np.array(poses) labeled_bboxs = np.array(labeled_bboxs) # print(poses) drawed_img = draw_annotates(img, labeled_bboxs, poses) img_name = f"{index}.jpg" if save_imgs_path: cv2.imwrite(os.path.join(save_imgs_path, img_name), drawed_img) # print(img_name) # print(f"[INFO] Head sensor: [{head_poses['yaw']}, {head_poses['pitch']}, {head_poses['pitch']}]") # print(f"[INFO] Head init: [{head_pose_init[0], head_pose_init[1], head_pose_init[2]}]") # # print(f"[INFO] Camera sensor: [{camera_yaw}, {camera_pitch}, {camera_roll}]") # print(f"[INFO] Img pose: [{head_yaw}, {head_pitch}, {head_roll}]") # print(label_path) # print(len(list_imgs)) # print(len(list_label_boxs)) if save_video_path: generate_video(save_imgs_path, save_video_path)
def main(): torch.backends.cudnn.benchmark = True args = parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu print("Configuration: ", args) dataloaders = preprocess(args.model_name, args.data_dir, 4, args.batch_size, 0.2) # inputs, classes = next(iter(dataloaders['val'])) # out = torchvision.utils.make_grid(inputs) # imsave(inputs, 'example_image.png') if args.model_path is not None: print("Load model from '{}'".format(args.model_path)) model = torch.load(args.model_path) else: model, *_ = model_selection(args.model_name, len(dataloaders['train'].dataset.classes)) model = model.cuda() hw_size = 224 if 'resnet' in args.model_name else 299 print(summary(model, (3, hw_size, hw_size))) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) if args.phase == 'train': model = train_model(model=model, model_name=args.model_name, dataloaders=dataloaders, criterion=criterion, optimizer=optimizer, scheduler=exp_lr_scheduler, batch_size=args.batch_size, save_dir=args.save_dir, num_epochs=args.num_epochs) torch.save( model, os.path.join(args.save_dir, 'gan-detection-' + args.model_name + '.h5')) else: test_model(model, dataloaders)
def __getitem__(self, item): with tf.device("/cpu:0"): groundtruth_valids = np.zeros([self.batch_size],np.int) random_img_size = np.random.choice(self.multi_scale) self.max_side = self.min_side = random_img_size self.gluoncv_aug = aug_gluoncv.YOLO3DefaultTrainTransform(self.max_side, self.max_side) batch_img = np.zeros([self.batch_size, self.max_side, self.max_side, 3]) batch_boxes = np.empty([self.batch_size, self._args.max_box_num_per_image, 5]) batch_boxes_list = [] for batch_index, file_index in enumerate(self.data_index[item*self.batch_size:(item+1)*self.batch_size]): #get image from file img_path = self.img_path_list[file_index] img = self.read_img(img_path) img, scale, pad = self.resize_fun(img, (self.max_side, self.min_side)) batch_img[batch_index, 0:img.shape[0], 0:img.shape[1], :] = img boxes = self.boxes_and_labels[file_index] boxes = copy.deepcopy(boxes) boxes[:, 0:4] *= scale half_pad = pad // 2 boxes[:, 0:4] += np.tile(half_pad,2) batch_boxes_list.append(boxes) groundtruth_valids[batch_index] = boxes.shape[0] boxes = np.pad(boxes, [(0, self._args.max_box_num_per_image-boxes.shape[0]), (0, 0)], mode='constant') batch_boxes[batch_index] = boxes tail_batch_size = len(batch_boxes_list) #augment if self.augment == 'mosaic': new_batch_size = self.batch_size//4 for bi in range(new_batch_size): four_img, four_boxes, one_img, one_boxes = data_augment.load_mosaic(batch_img[bi * 4:(bi + 1) * 4], batch_boxes_list[bi * 4:(bi + 1) * 4]) data_augment.random_hsv(one_img) data_augment.random_left_right_flip(one_img, one_boxes) groundtruth_valids[bi] = one_boxes.shape[0] one_boxes = np.pad(one_boxes,[(0, self._args.max_box_num_per_image-one_boxes.shape[0]), (0, 0)], mode='constant') batch_img[bi] = one_img batch_boxes[bi] = one_boxes batch_img = batch_img[0:new_batch_size] batch_boxes = batch_boxes[0:new_batch_size] elif self.augment == 'only_flip_left_right': for bi in range(self.batch_size): data_augment.random_left_right_flip(batch_img[bi], batch_boxes[bi]) elif self.augment == 'ssd_random_crop': batch_img = batch_img.astype(np.uint8) for di in range(self.batch_size): batch_img[di], batch_boxes_list[di] = self.gluoncv_aug(batch_img[di], batch_boxes_list[di]) batch_boxes[di] = np.pad(batch_boxes_list[di], [(0, self._args.max_box_num_per_image - batch_boxes_list[di].shape[0]), (0, 0)]) groundtruth_valids[di] = batch_boxes_list[di].shape[0] batch_img = batch_img[0:tail_batch_size] batch_boxes = batch_boxes[0:tail_batch_size] groundtruth_valids = groundtruth_valids[0:tail_batch_size] batch_img, batch_boxes = preprocess(batch_img, batch_boxes) if int(self._args.num_classes) == 1: y_true = get_y_true_with_one_class(self.max_side, batch_boxes, groundtruth_valids, self._args) else: y_true = get_y_true(self.max_side, batch_boxes, groundtruth_valids, self._args) if self.mode == 2: return batch_img, batch_boxes, groundtruth_valids return batch_img, y_true return batch_img, y_true, batch_boxes
from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score from hyperopt import hp, fmin, tpe, space_eval, Trials, STATUS_OK from utils.preprocess import preprocess, preprocess_lm from utils.helpers import clean_params import pandas as pd from tqdm import tqdm import yaml # Preprocess data seed = 1 test_size = 0.2 train, valid, y_var, X_vars, test, _ = preprocess(test_set=True, test_size=test_size, pca=False, random_state=seed) train_p, valid_p, y_var, X_vars_p, test_p, _p = preprocess(test_set=True, test_size=test_size, random_state=seed) train_lm, valid_lm, test_lm, _lm, X_vars_lm, y_var = preprocess_lm( test_size=test_size, random_state=seed) h2o.init() # Load the models def load(file): return yaml.load(open(file, 'r'), Loader=yaml.FullLoader) rfc_final = RandomForestClassifier(**load('model_params/rfc_01.yaml'))
def visualize_fusion(uni_model, var_model, wei_model, dir_imgs, label_box_imgs, save_imgs_path=None, save_video_path=None, save_cropped_path=None, target_size_imgs = 64): """ input: uni_model: var_model: wei_model: dir_imgs: path to save imgs directory label_box_imgs: path to save bbox directory save_cropped_path: path to save cropped imgs directory output: draw bboxs and poses on images. Export to imgs and video(option) """ list_imgs = read_frames(dir_imgs) resizer = albu.Compose([albu.SmallestMaxSize(target_size_imgs, p=1.), albu.CenterCrop(target_size_imgs, target_size_imgs, p=1.)]) # head_pose_offset = [head_pose_init[0] - camera_yaw, head_pose_init[1] - camera_pitch, head_pose_init[2] - camera_roll] print("[INFO] Drawing ...") for index, img_path in enumerate(tqdm.tqdm(list_imgs[:])): # print(img_path) img = cv2.imread(img_path) height, width, channels = img.shape # print(height, width) img_name = os.path.basename(img_path) if img_name.split(".")[-1] == "jpg": label_name = img_name.replace(".jpg", ".txt") else: label_name = img_name.replace(".png", ".txt") label_path = os.path.join(label_box_imgs, label_name) bboxs = get_bbox_YOLO_format(label_path, width, height) poses = [] labeled_bboxs = [] for index in range(0, len(bboxs)): bbox = bboxs[index] bbox = np.array([bbox[0], bbox[1], bbox[2], bbox[3]]) x, y = bbox[:2] w, h = (bbox[2:] - bbox[:2]) x, y, w, h = int(x), int(y), int(w), int(h) cropped_frame = img[y:y+h, x:x+w] # Todo: resize frame. If size < 64 then scale up else scale down # but both methods keep aspect ratio. if w < target_size_imgs or h < target_size_imgs: scale_percent = target_size_imgs / min(w, h) width = int(img.shape[1] * scale_percent) height = int(img.shape[0] * scale_percent) # cv2.imwrite("cropped_frame.jpg", cropped_frame) # Scale down and crop to target size if cropped_frame.shape[0] == 0 or cropped_frame.shape[1] == 0: continue cropped_frame = resizer(image=cropped_frame) cropped_frame_copy = np.copy(np.array(cropped_frame['image'])) cropped_frame = preprocess(np.array(cropped_frame['image'])) # print(cropped_frame) # Convert to tensor and transform to [-1, 1] cropped_frame = torch.FloatTensor(cropped_frame).permute(2, 0, 1).unsqueeze_(0) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") cropped_frame.to(device) # print(cropped_frame.size()) # predict uni_preds = uni_model(cropped_frame) var_preds = var_model(cropped_frame) wei_preds = wei_model(cropped_frame) preds = uni_preds.add(var_preds).add(wei_preds)/3 preds = preds.cpu().detach().numpy() if save_cropped_path: drawed_cropped_img = draw_annotates(cropped_frame_copy, np.array([[0, 0, 63, 63]]), np.array([preds[0]])) absolute_name_img = img_name.split(".")[0] cropped_img_name = f"{absolute_name_img}_{index}.jpg" # print(os.path.join(save_cropped_path, cropped_img_name)) cv2.imwrite(os.path.join(save_cropped_path, cropped_img_name), drawed_cropped_img) poses.append(preds[0]) labeled_bboxs.append(bbox) # print(bboxs) poses = np.array(poses) labeled_bboxs = np.array(labeled_bboxs) # print(poses) drawed_img = draw_annotates(img, labeled_bboxs, poses) if save_imgs_path: cv2.imwrite(os.path.join(save_imgs_path, img_name), drawed_img) # print(img_name) # print(f"[INFO] Head sensor: [{head_poses['yaw']}, {head_poses['pitch']}, {head_poses['pitch']}]") # print(f"[INFO] Head init: [{head_pose_init[0], head_pose_init[1], head_pose_init[2]}]") # # print(f"[INFO] Camera sensor: [{camera_yaw}, {camera_pitch}, {camera_roll}]") # print(f"[INFO] Img pose: [{head_yaw}, {head_pitch}, {head_roll}]") # print(label_path) # print(len(list_imgs)) # print(len(list_label_boxs)) if save_video_path: generate_video(save_imgs_path, save_video_path)
def index(): start = timeit.default_timer() # Load models classifiers = sentiment_classifiers.Asp_Sentiment_classifiers() stop = timeit.default_timer() print('Model load times: ', stop - start) if request.method == 'POST': # if request.form['submit'] == 'submit': # print('test') file_ = request.form['file'] + "" # direc1 = dal+"/"+name for root, dirs, files in os.walk('D:/William/Workspace/test2'): for name in files: if file_ in name: direc1 = os.path.abspath(os.path.join(root, name)) direc2 = direc1.replace("\\", "/") # print(direc2) df = read_from_file(direc2) collection_name = 'details' print('Preprocessing...') reviews_df = preprocess(df) print('Classifying aspects...') reviews_df = classifiers.aspect_classifier(reviews_df, 0.1) print('Classifying sentiments...') reviews_df = classifiers.sentiments(reviews_df) print('Done!') delete_in_batch() details = stats.get_details(reviews_df) data_to_csv.save_file(details) inject('data/data_pred.csv', collection_name) # Get new data overall, aspect1, aspect2, aspect3, aspect4, _ = stats.get_chart_data( reviews_df) # # Get existing data and update # ref = db.child('visualization').get() # a1 = ref.each()[0].val() # a2 = ref.each()[1].val() # a3 = ref.each()[2].val() # a4 = ref.each()[3].val() # ov = ref.each()[4].val() # # Combine # overall = [overall[0] + ov['positive'], overall[1] + ov['neutral'], overall[2] +ov['negative']] # aspect1 = [aspect1[0] + a1['positive'], aspect1[1] + a1['neutral'], aspect1[2] +a1['negative']] # aspect2 = [aspect2[0] + a2['positive'], aspect2[1] + a2['neutral'], aspect2[2] +a2['negative']] # aspect3 = [aspect3[0] + a3['positive'], aspect3[1] + a3['neutral'], aspect3[2] +a3['negative']] # aspect4 = [aspect3[0] + a4['positive'], aspect4[1] + a4['neutral'], aspect4[2] +a4['negative']] # Update detailer = { "overall": { "positive": overall[0], "neutral": overall[1], "negative": overall[2] }, "aspect1": { "positive": aspect1[0], "neutral": aspect1[1], "negative": aspect1[2] }, "aspect2": { "positive": aspect2[0], "neutral": aspect2[1], "negative": aspect2[2] }, "aspect3": { "positive": aspect3[0], "neutral": aspect3[1], "negative": aspect3[2] }, "aspect4": { "positive": aspect4[0], "neutral": aspect4[1], "negative": aspect4[2] }, } db.child("visualization").update(detailer) return redirect(url_for('visualization')) return render_template("home_page.html")
from lib import solPatternGeneration as sPG from lib.helper import generate_pos_tags_for_patterns as gen_pos from lib import syntacticPatternGeneralization as syntacticPG from utils import post_processing as pp from pprint import pprint if __name__ == "__main__": print("Program Started...") params = { 'data_dir': '/'.join(sys.argv[1].split('/')[:-1]), 'corpus_fn': sys.argv[1].split('/')[-1] } os.chdir(params['data_dir']) #Input Data pre. corpus = preprocess(params) #corpus.print_info() print("Generating Textual Patterns") tPG.generate_textual_patterns(corpus) print("Done generating textual patterns.") print("Generating POS tags") gen_pos(corpus) print("Done generating POS tags.") print("Mining sequences") ngMine.generate_seqmining_dataset(corpus) ngMine.generate_frequent_ngrams(corpus, 2) print("Done Mining sequences.")
def __getitem__(self, index): index = index % len(self.ids) idxs = np.random.choice(self.ids_index[index], size=2, replace=False) img_path1 = self.base_dir / (self.ids[index] + '_%d.jpg' % idxs[0]) img_path2 = self.base_dir / (self.ids[index] + '_%d.jpg' % idxs[1]) # scale = np.random.random_sample() * 0.2 + 0.1 scale = np.random.random_sample() * 0.2 + 1.4 bbox1 = change_bbox(self.bboxs[index][idxs[0]], scale=scale, use_forehead=False) bbox2 = change_bbox(self.bboxs[index][idxs[1]], scale=scale, use_forehead=False) img1 = np.array(Image.open(img_path1).crop(bbox1)) img2 = np.array(Image.open(img_path2).crop(bbox2)) lbl1 = self.labels[index][idxs[0]] lbl2 = self.labels[index][idxs[1]] if self.use_bined: bined_label = self.euler_binned[index].copy() bined_lbl1 = bined_label[idxs[0]] bined_lbl2 = bined_label[idxs[1]] # ImageAugment (RandomBrightness, AddNoise...) if self.image_augmenter: augmented = self.image_augmenter(image=img1) img1 = augmented['image'] augmented = self.image_augmenter(image=img2) img2 = augmented['image'] # Resize (Scale & Pad & Crop) if self.resizer: resized = self.resizer(image=img1) img1 = resized['image'] resized = self.resizer(image=img2) img2 = resized['image'] # AffineAugment (Horizontal Flip, Rotate...) if self.affine_augmenter: augmented = self.affine_augmenter(image=img1) img1 = augmented['image'] augmented = self.affine_augmenter(image=img2) img2 = augmented['image'] # label = (lbl1 > lbl2) * 2 - 1 label = np.sign(lbl1 - lbl2) if self.n_class == 4: lbl1 = euler2quat(*lbl1) lbl2 = euler2quat(*lbl2) if self.debug: print(label) return img1, img2 else: img1 = preprocess(img1) img1 = torch.FloatTensor(img1).permute(2, 0, 1) img2 = preprocess(img2) img2 = torch.FloatTensor(img2).permute(2, 0, 1) label = torch.FloatTensor(label.astype(np.float32)) lbl1 = torch.FloatTensor(lbl1) lbl2 = torch.FloatTensor(lbl2) if self.use_bined: return img1, img2, lbl1, lbl2, label, int(bined_lbl1[0]), int(bined_lbl1[1]), int(bined_lbl1[2]),\ int(bined_lbl2[0]), int(bined_lbl2[1]), int(bined_lbl2[2]) else: return img1, img2, lbl1, lbl2, label
from models.fully_connected_nn import build_model if __name__ == '__main__': # loading parameters with open("configs/default.json", mode="r") as f: params = json.load(f) num_classes = params["num_classes"] epochs = params["epochs"] batch_size = params["batch_size"] # loading datasets (X_train, y_train), (X_test, y_test) = mnist.load_data() # preprocess data X_train, y_train, X_test, y_test = preprocess(X_train, y_train, X_test, y_test, num_classes) # split train & validation data X_train, X_valid = np.split(X_train, [50000]) y_train, y_valid = np.split(y_train, [50000]) # build model model = build_model() # fitting model to training data fit = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(X_valid, y_valid))
for training_filename in tqdm.tqdm(randomized_files): try: audio, _ = librosa.load(training_filename, sr=p.sr, mono=True) print(audio) audio = audio.reshape(-1, 1) training_data = training_data + audio.tolist() except: pass training_data = np.array(training_data) print(training_data) training_audio_length = len(training_data) np.save('training_data', training_data) logger.info('preprocessing..') training_data = preprocess.preprocess(training_data, p.bs, p.fsz, p.fs) logger.info('**training started**') model = model.WaveNet(p.isz, p.nl, p.ks, p.dr, p.nf).model() model.compile(loss='categorical_crossentropy', optimizer='adam') if p.s == True: model.summary() model.fit(training_data, epochs=p.epochs, steps_per_epoch=training_audio_length // 128, verbose=1, callbacks=[earlystopping_callback, tensorboard_callback]) model.save('src/trained_model/modelWN.h5') logger.info("model saved in model/n Training finished successfully")
type=str, default="data/data.csv", help='train file path') parser.add_argument('--test_file', type=str, default="data/test.csv", help='test file path') args = parser.parse_args() train_file = args.train_file test_file = args.test_file # Import the data-set df_train = pd.read_csv(train_file) len_train = df_train.__len__() df_test = pd.read_csv(test_file) df = pd.concat([df_train, df_test]) # Preprocess the features pre = preprocess(df) df = pre.getData() df_train = df[0:len_train - 1] df_test = df[len_train:] # Train and Test LR_model, RF_model = Train(df_train) Test(df_test, LR_model, RF_model)
def visualize_AFLW2000(model, base_dir, target_size, filename, save_imgs_path): print("[INFO] Initing ALFW2000Dataset.") base_dir = Path(base_dir) target_size = target_size img_paths = [] bboxs = [] labels = [] pred_poses = [] with open(base_dir / filename) as f: for i, line in enumerate(tqdm.tqdm(f.readlines()[:])): ls = line.strip() mat_path = base_dir / ls.replace('.jpg', '.mat') bbox, pose = get_pt_ypr_from_mat(mat_path, pt3d=True) if True and (abs(pose[0])>99 or abs(pose[1])>99 or abs(pose[2])>99): continue labels.append(np.array(pose)) bboxs.append(bbox) img_paths.append(ls) resizer = albu.Compose([albu.SmallestMaxSize(target_size, p=1.), albu.CenterCrop(target_size, target_size, p=1.)]) for index in tqdm.tqdm(range(0, len(img_paths[:]))): img_path = base_dir / img_paths[index] img_name = os.path.basename(img_paths[index]) # print(img_path) bbox = change_bbox(bboxs[index], 2, use_forehead=False) # bbox = bboxs[index] raw_img = Image.open(img_path) img = np.array(raw_img.crop(bbox)) img = img[:,:,::-1] img = resizer(image=img) img = preprocess(np.array(img['image'])) # print(img) # Convert to tensor and transform to [-1, 1] img = torch.FloatTensor(img).permute(2, 0, 1).unsqueeze_(0) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") img.to(device) pred = model(img) pred = pred.cpu().detach().numpy() pred_poses.append(pred[0]) if save_imgs_path: # print(bbox) # print(pred[0]) b, g, r = raw_img.split() raw_img = Image.merge("RGB", (r, g, b)) drawed_img = draw_annotates(np.array(raw_img), np.array([bbox]), np.array([pred[0]])) cv2.imwrite(os.path.join(save_imgs_path, img_name), drawed_img) pred_poses = np.array(pred_poses) labels = np.array(labels) delta = np.absolute(labels - pred_poses) delta = np.sum(delta, axis=1) / 3 # print(delta) sortted_delta = np.sort(delta) # print(sortted_delta) index_of_max_delta = [np.where(delta==value_delta) for value_delta in sortted_delta[-5:]] index_of_min_delta = [np.where(delta==value_delta) for value_delta in sortted_delta[:5]] max_delta_dir = os.path.join(save_imgs_path, "max_delta") os.system(f"rm -rf \"{max_delta_dir}\"") min_delta_dir = os.path.join(save_imgs_path, "min_delta") os.system(f"rm -rf \"{min_delta_dir}\"") # print(index_of_max_delta) for index in index_of_max_delta: # print(index[0][0]) Path(max_delta_dir).mkdir(parents=True, exist_ok=True) img_name = os.path.basename(img_paths[index[0][0]]) path_drawed_img = os.path.join(save_imgs_path, img_name) os.system(f"cp -i \"{path_drawed_img}\" \"{max_delta_dir}\"") for index in index_of_min_delta: # print(index[0][0]) Path(min_delta_dir).mkdir(parents=True, exist_ok=True) img_name = os.path.basename(img_paths[index[0][0]]) path_drawed_img = os.path.join(save_imgs_path, img_name) os.system(f"cp -i \"{path_drawed_img}\" \"{min_delta_dir}\"")
def main(): df = pd.read_table(DATA_PATH) df = preprocess(df) model, score = train(df) model_name = save_model(model, score) save_plotimage(model_name)
import numpy as np import torch import torch.nn as nn from torch.autograd import Variable import pickle from utils.preprocess import preprocess SRC, TRG, train_iter, val_iter = preprocess(0, 64) print('Data loaded.') def load_embeddings(path, TEXT, embedding_dim=300): """ Creates a embedding from a file containing words and vector indices separated by spaces. Modified from https://github.com/A-Jacobson/CNN_Sentence_Classification/blob/master/WordVectors.ipynb """ with open(path) as f: embeddings = np.zeros((len(TEXT.vocab), embedding_dim)) for i, line in enumerate(f.readlines()): values = line.split() word = values[0] if word in TEXT.vocab.stoi: index = TEXT.vocab.stoi[word] try: vector = np.array(values[1:], dtype='float32') except: vector = np.array([0] * 300, dtype='float32') print('error: ', word) embeddings[index] = vector if i % 10000 == 0: print('{i} complete'.format(i=i) return embeddings # Save German embeddings
def make_predictions(data_raw, data_name, regression_model): """ makes predictions for NN and LR models :param data_raw: input data not preprocessed :param data_name: name of data :param regression_model: initialized model for regression task :return: """ # define column names in data. Usually would be a console input key_columns = '(Opportunity_Name,Product)' update_col = 'Upload_date' created_col = 'Created' opp_name_col = 'Opportunity_Name' product_name_col = 'Product' key_columns = key_columns[1:-1].split(',') target = 'future stage' # target for the first part --> stage Won or Lost target_second_part = 'time_diff_to_close' # target for the second part --> time till closing # Models parameters nn_activation_list = ['identity', 'logistic', 'tanh', 'relu'] nn_solver_list = ['lbfgs', 'sgd', 'adam'] nn_nodes_list = ['(2, 2, 2)', '(100, 100)', '(20, 16, 10, 4)', '(100, 80, 60, 40)'] lr_solver_list = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'] c_list = [x * .1 for x in range(1, 11)] # preprocess data X, y, index_train, index_test, second_part, y_proba_guessed, updates, data_won = preprocess( data_raw, target, key_columns, update_col, created_col, opp_name_col, product_name_col) # train model for periods prediction first, because the same model will be used each time train_regression_model(regression_model, second_part, index_train, target_second_part) X_train = X.loc[index_train] X_test = X.loc[index_test] y_train = y.loc[index_train] y_test = y.loc[index_test] y_guessed = [1 if x >= 0.5 else 0 for x in y_proba_guessed] print('Guessed probabilities'.upper()) print(classification_report(y_test, y_guessed)) print('Confusion matrix'.upper()) print(confusion_matrix(y_test, y_guessed)) ns_auc = roc_auc_score(y_test, y_proba_guessed) print('No Skill: ROC AUC=%.3f' % ns_auc) best_auc, best_mae_weighted, best_mae_unweighted, best_model_auc, best_model_mae_weighted, best_model_mae_unweighted = initialize_metrics() for nn_activation in nn_activation_list: for nn_solver in nn_solver_list: for nn_nodes in nn_nodes_list: nn = NeuralNetModel(X_train, X_test, y_train, y_test, index_test, index_train, second_part, target, update_col, y_proba_guessed, updates, data_won, data_name, regression_model) nn.define_model(solver=nn_solver, activation=nn_activation, n_nodes=nn_nodes) auc, mae_guessed, mae_weighted, mae_unweighted, model = nn.fit_predict() if auc > best_auc: best_auc = auc best_model_auc = model if mae_weighted < best_mae_weighted: best_mae_weighted = mae_weighted best_model_mae_weighted = model if mae_unweighted < best_mae_unweighted: best_mae_unweighted = mae_unweighted best_model_mae_unweighted = model print( 'best nn model by AUC: '.upper() + best_model_auc.upper() + " with AUC {:.2f}".format(best_auc)) print( 'best guessed revenue MAE: '.upper() + '{:.2f}'.format(mae_guessed)) print( 'best nn model by predicted revenue MAE: '.upper() + best_model_mae_weighted.upper() + " with MAE {:.2f}".format(best_mae_weighted)) print( 'best nn model by strictly predicted revenue MAE: '.upper() + best_model_mae_unweighted.upper() + " with MAE {:.2f}".format(best_mae_unweighted)) best_auc, best_mae_weighted, best_mae_unweighted, best_model_auc, best_model_mae_weighted, best_model_mae_unweighted = initialize_metrics() for lr_solver in lr_solver_list: for c in c_list: lr = LogRegModel(X_train, X_test, y_train, y_test, index_test, index_train, second_part, target, update_col, y_proba_guessed, updates, data_won, data_name, regression_model) lr.define_model(solver=lr_solver, c=c) auc, mae_guessed, mae_weighted, mae_unweighted, model = lr.fit_predict() if auc > best_auc: best_auc = auc best_model_auc = model if mae_weighted < best_mae_weighted: best_mae_weighted = mae_weighted best_model_mae_weighted = model if mae_unweighted < best_mae_unweighted: best_mae_unweighted = mae_unweighted best_model_mae_unweighted = model print( 'best lr model by AUC: '.upper() + best_model_auc.upper() + " with AUC {:.2f}".format(best_auc)) print( 'best guessed revenue MAE: '.upper() + '{:.2f}'.format(mae_guessed)) print( 'best lr model by predicted revenue MAE: '.upper() + best_model_mae_weighted.upper() + " with MAE {:.2f}".format(best_mae_weighted)) print( 'best lr model by strictly predicted revenue MAE: '.upper() + best_model_mae_unweighted.upper() + " with MAE {:.2f}".format(best_mae_unweighted))
from tqdm import tqdm import torch from Mem2Seq import Mem2Seq from utils.preprocess import preprocess from utils.vocab import CustomVocab, merge_bpe from config import config device = torch.device(config['device']) vocab, train_loader, valid_loader, test_loader, max_s, max_r = preprocess(config['vocab_path'], config['codes_path'], config['train_datasets'], config['valid_datasets'], config['test_datasets'], config['train_batch_size'], config['valid_batch_size'], device) model = Mem2Seq(hidden_size=config['hdd'], n_layers=config['layers'], max_s=max_s, max_r=max_r, vocab=vocab, load_path=config['load_path'], save_path=config['save_path'], lr=config['lr'], dr=config['dr'], position=config['position'], device=device) print('Version {} - Training starts with model of {} layers, {} hdd, {} lr, {} dr, {} tr, {} temp, {} clip, {} position and {} batch size'.format(config['version'], config['layers'], config['hdd'], config['lr'], config['dr'], config['tr'], config['temp'], config['clip'], config['position'], config['train_batch_size'])) valid_iter = iter(valid_loader) decay_cnt = 0 decay_num = 0 loss_best = float('inf') loss_prev = float('inf') for epoch in range(1, 1 + config['epochs']): # Run the train function pbar = tqdm(enumerate(train_loader), total=len(train_loader)) for i, data in pbar: model.train_batch(data, config['clip'], config['tr'], i==0) pbar.set_description('<epoch {}> '.format(epoch) + model.print_loss()) loss, bleu, f1, perplexity, perplexity_m = model.evaluate(valid_loader, config['temp']) print('BLEU: {:.3f}, F1: {:.3f}, Perplexity: {:.3f}, Masked Perplexity: {:.3f}'.format(bleu, f1, perplexity, perplexity_m))
def main(model_name): df = pd.read_table(DATA_PATH) df = preprocess(df) model = load_model(model_name) predict_list = prediction(model, df) make_submit_tsv(predict_list)
def testing(): start = timeit.default_timer() # Load models classifiers = sentiment_classifiers.Asp_Sentiment_classifiers() stop = timeit.default_timer() print('Model load times: ', stop - start) if request.method == 'POST': if request.form['submit'] == 'submit': # print('test') sentence1 = request.form['sentence1'] + "" sentence2 = request.form['sentence2'] + "" sentence3 = request.form['sentence3'] + "" sentence4 = request.form['sentence4'] + "" sentence5 = request.form['sentence5'] + "" # direc1 = dal+"/"+name # for root,dirs,files in os.walk('D:/William/Workspace/test'): # for name in files: # if file_ in name: # direc1 = os.path.abspath(os.path.join(root, name)) # direc2 = direc1.replace("\\","/") # # print(direc2) df = read_from_sentences(sentence1, sentence2, sentence3, sentence4, sentence5) collection_name = 'details' print('Preprocessing...') reviews_df = preprocess(df) print('Classifying aspects...') reviews_df = classifiers.aspect_classifier(reviews_df, 0.06) print('Classifying sentiments...') reviews_df = classifiers.sentiments(reviews_df) print('Done!') # Detail page details_data = stats.get_details(reviews_df) data_to_csv.save_file(details_data) inject('data/data_pred.csv', collection_name) # Update (testing page) details_data s1 = ["", "", "", "", ""] for i in range(len(details_data)): s1[i] = (details_data['sentence'][i]) det = { "userinput": { "sentence1": s1[0], "sentence2": s1[1], "sentence3": s1[2], "sentence4": s1[3], "sentence5": s1[4] } } db.child("testing").update(det) # Get new data (update testing page only) overall, aspect1, aspect2, aspect3, aspect4, _ = stats.get_chart_data( reviews_df) detailer1 = { "overall": { "positive": overall[0], "neutral": overall[1], "negative": overall[2] }, "aspect1": { "positive": aspect1[0], "neutral": aspect1[1], "negative": aspect1[2] }, "aspect2": { "positive": aspect2[0], "neutral": aspect2[1], "negative": aspect2[2] }, "aspect3": { "positive": aspect3[0], "neutral": aspect3[1], "negative": aspect3[2] }, "aspect4": { "positive": aspect4[0], "neutral": aspect4[1], "negative": aspect4[2] }, } db.child("testing").update(detailer1) # Update all pie chart data # Get existing data ref = db.child('visualization').get() a1 = ref.each()[0].val() a2 = ref.each()[1].val() a3 = ref.each()[2].val() a4 = ref.each()[3].val() ov = ref.each()[4].val() # Combine overall = [ overall[0] + ov['positive'], overall[1] + ov['neutral'], overall[2] + ov['negative'] ] aspect1 = [ aspect1[0] + a1['positive'], aspect1[1] + a1['neutral'], aspect1[2] + a1['negative'] ] aspect2 = [ aspect2[0] + a2['positive'], aspect2[1] + a2['neutral'], aspect2[2] + a2['negative'] ] aspect3 = [ aspect3[0] + a3['positive'], aspect3[1] + a3['neutral'], aspect3[2] + a3['negative'] ] aspect4 = [ aspect3[0] + a4['positive'], aspect4[1] + a4['neutral'], aspect4[2] + a4['negative'] ] # Update detailer = { "overall": { "positive": overall[0], "neutral": overall[1], "negative": overall[2] }, "aspect1": { "positive": aspect1[0], "neutral": aspect1[1], "negative": aspect1[2] }, "aspect2": { "positive": aspect2[0], "neutral": aspect2[1], "negative": aspect2[2] }, "aspect3": { "positive": aspect3[0], "neutral": aspect3[1], "negative": aspect3[2] }, "aspect4": { "positive": aspect4[0], "neutral": aspect4[1], "negative": aspect4[2] }, } db.child("visualization").update(detailer) return render_template("testing.html") return render_template("testing.html")
curr_loss = curr_loss + loss.item() if iteration % plot_every == 0: all_loss.append(curr_loss / plot_every) curr_loss = 0 iteration += 1 model.eval() torch.save(model.state_dict(), "bertClassifier.pt") with open('loss.txt', 'w') as filehandle: for loss in all_loss: filehandle.write('%s\n' % loss) if __name__ == "__main__": dataset_name = "train.csv" encodings = preprocess.preprocess(dataset_name) train_dataset = RealOrNotDataset(encodings) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = BertClassifier() model.to(device) optimizer = AdamW(model.parameters(), lr=3e-5) dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True) num_epoches = 4 total_steps = len(dataloader) * num_epoches scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps) train(encodings, dataloader, optimizer, scheduler, model, device, num_epoches)
from utils.preprocess import preprocess from utils.helpers import clean_params import pandas as pd import numpy as np import yaml from hyperopt import hp, fmin, tpe, space_eval, Trials, STATUS_OK from h2o.estimators.gbm import H2OGradientBoostingEstimator from sklearn.metrics import roc_auc_score import h2o ### Preprocessing train, valid, y_var, X_vars, test, _ = preprocess(test_set=True, test_size=0.15, pca=False) h2o.init() t = h2o.H2OFrame(train) v = h2o.H2OFrame(valid) _p = h2o.H2OFrame(_) t[y_var] = t[y_var].asfactor() v[y_var] = v[y_var].asfactor() _p[y_var] = _p[y_var].asfactor() ### Hyperparameter optimization
def train(server=None): # Reading training set train_data = preprocess(FLAGS.train_files.split(','), n_input, n_context, alphabet) # Reading validation set #dev_data = preprocess(FLAGS.dev_files.split(','), # n_input, # n_context, # alphabet) # Generate feeding data in the training set train_iters = model_feeder(train_data, batch_size=FLAGS.train_batch_size, feeder_name='train_batch') # Generate feeding data in the validation set # dev_iters = model_feeder(dev_data, \ # batch_size=FLAGS.dev_batch_size, \ # feeder_name='dev_batch') X_length_mb, X_indices_mb, Y_length_mb, Y_input_indices_mb, Y_target_indices_mb = train_iters.get_next( ) sim_asr = SimpleASR(s_len=X_length_mb, s_indices=X_indices_mb, t_len=Y_length_mb, t_input_indices=Y_input_indices_mb, t_output_indices=Y_target_indices_mb, n_of_classes=n_character, t_dict=alphabet) opt = tf.train.AdamOptimizer(learning_rate=0.001) training_op = opt.minimize(loss=sim_asr.loss) sess = tf.Session(config=session_config) sess.run(tf.global_variables_initializer()) print('Start training') print('==> total epochs : {}'.format(n_epoch)) for epoch in range(n_epoch): avg_tr_loss = 0 tr_step = 0 sess.run(train_iters.initializer) #FIXME: need to automatically select the batch size from the #epoch and #data. #for step in range(n_batch): for step in range(164): # for debugging the AttASR or the LargeAttASR class #_, tr_loss, dec_out, enc_out, dec_in, dec_toks, dec_target_idx = sess.run(fetches = [training_op, sim_asr.loss, sim_asr._tr_outputs, sim_asr.enc_outputs, sim_asr.t_batch, sim_asr.tr_tokens, sim_asr._t_output_indices]) # for debugging the SimpleASR or the LargeSimpleASR class _, tr_loss, dec_out, enc_out, dec_in, dec_toks, dec_target_idx = sess.run( fetches=[ training_op, sim_asr.loss, sim_asr._tr_outputs, sim_asr.concatenated_enc_state, sim_asr.t_batch, sim_asr.tr_tokens, sim_asr._t_output_indices ]) #_, tr_loss = sess.run(fetches=[training_op, sim_asr.loss]) avg_tr_loss += tr_loss tr_step += 1 print('==> epoch : {}, step : {}, tr_loss : {:.3f}'.format( epoch + 1, tr_step, tr_loss)) #pdb.set_trace() avg_tr_loss /= tr_step print('epoch : {}, avg_tr_loss : {:.3f}'.format( epoch + 1, avg_tr_loss)) pdb.set_trace() print('Finished')
def main(): parser = ArgumentParser() action_parser = parser.add_subparsers(title="actions", dest="action", required=True, help="select action to execute") # args for preprocessing preprocess_parser = action_parser.add_parser("preprocess", help="preprocess data") preprocess_parser.add_argument( "-r", "--root-dir", dest="root_dir", required=True, help="root directory of the common voice dataset") # args for feature extraction feature_extractor_parser = action_parser.add_parser( "feature_extractor", help="feature extractor") feature_extractor_parser.add_argument( "-r", "--root-dir", dest="root_dir", required=True, help="root directory of the common voice dataset") # args for training training_parser = action_parser.add_parser("train", help="Train the model") training_parser.add_argument( "-r", "--root-dir", dest="root_dir", required=True, help="root directory of the common voice dataset") training_parser.add_argument( "-m", "--model-name", dest="model_key", required=True, help="key to determine the model to be trained") # args for testing test_parser = action_parser.add_parser("test", help="Test the model") test_parser.add_argument("-r", "--root-dir", dest="root_dir", required=True, help="root directory of the common voice dataset") test_parser.add_argument("-m", "--model-name", dest="model_key", required=True, help="key to determine the model to be tested") test_parser.add_argument("-c", "--checkpoint-dir", dest="checkpoint_path", required=True, help="root directory of the saved models") # args for inference inference_parser = action_parser.add_parser( "inference", help="Run inference on the model") inference_parser.add_argument("-r", "--root-dir", dest="root_dir", required=True, help="root directory of the audio files") inference_parser.add_argument("-m", "--model-path", dest="model_path", required=True, help="path of the model") action, args = clean_args(parser.parse_args()) if action == 'preprocess': preprocess(**args) elif action == 'feature_extractor': extract_features(**args) elif action == 'train': train_model(**args) elif action == 'test': test_model(**args) elif action == 'inference': inference(**args)
stat_filename = "result_" + experiment_name + ".csv" STATS_FILE = os.path.join(STATS_DIR, stat_filename) DICE_METRICS_FILE = os.path.join( STATS_DIR, "detailed_dice_" + experiment_name + ".csv") ######################## LOAD MODEL ######################## model = load_model(model_filename, custom_objects=custom_losses) ######################## PREPROCESSING ######################## src_dir, filename = os.path.split(results.INFILE) preprocess.preprocess(filename, src_dir=src_dir, dst_dir=PREPROCESSING_DIR, tmp_dir=TMPDIR, verbose=0, skullstrip_script_path=SKULLSTRIP_SCRIPT_PATH, remove_tmp_files=True) ######################## SEGMENT FILE ######################## # load nifti file data nii_obj = nib.load(os.path.join(PREPROCESSING_DIR, filename)) nii_img = nii_obj.get_data() header = nii_obj.header affine = nii_obj.affine # reshape to account for implicit "1" channel nii_img = np.reshape(nii_img, nii_img.shape + (1, ))