def test_host_uuid_from_cernvm_config(self): """Test host uuid is from the cernvm file.""" new_uuid = str(uuid4()) content = "CERNVM_UUID={0}".format(new_uuid) write_to_file(self.cernvmconf_fp, content) self.init_hw_info() self.assertEqual(self.hardware_info.host_uuid, new_uuid)
def __init__(self, hgain=0.01, sgain=0.3, vgain=0.3, p=0.5): print('using AugmentHSV !') write_to_file('using AugmentHSV !', log_file_path) self.p = p self.hgain = hgain self.sgain = sgain self.vgain = vgain
def replicate_production_environment(self): """Mock the environment with temp files, solving dependencies.""" tempfile.tempdir = tempfile.mkdtemp(dir="/tmp") # Mock cernvm environment _, self.uuid_fp = tempfile.mkstemp() self.config["uuid_file"] = self.uuid_fp _, self.cernvmconf_fp = tempfile.mkstemp() self.config["local_cernvm_config"] = self.cernvmconf_fp # Mock dumbq environment self.dumbq_dir = tempfile.mkdtemp() self.config["dumbq_dir"] = self.dumbq_dir self.www_dir = tempfile.mkdtemp() self.config["www_dir"] = self.www_dir self.setup_config_files() # Bind mount directories _, self.shared = tempfile.mkstemp() _, self.guest = tempfile.mkstemp() self.config["shared_meta_file"] = self.shared + "=" + self.guest # Set up with a few environment variables _, self.envvar_file = tempfile.mkstemp() self.config["envvar_file"] = self.envvar_file self.envvar_floppy = "GREETING", "hello" self.envvar = "FAREWELL", "goodbye" write_to_file("/dev/fd0", "=".join(self.envvar_floppy)) write_to_file(self.envvar_file, "=".join(self.envvar)) # Tty enabled self.config["base_tty"] = 2
def upload_diag(request): ''' 上传诊断文件 :param request: :return: 文件内容 ''' if request.method == "POST": # 请求方法为POST时,进行处理 myFile = request.FILES.get("myfile", None) # 获取上传的文件,如果没有文件,则默认为None upload_filename = utils.random_string() + "." + myFile.name.split( ".")[-1] if not myFile: return "请上传文件!" ext = upload_filename.split(".")[-1] utils.write_to_file(myFile, os.path.join(utils.DIR_UPLOADS, upload_filename), ext) mongod.insert_diag_file({"code": upload_filename, "file": myFile.name}) files = load_files() return HttpResponse(json.dumps({ "diag_files": files, "diag": load_diag(upload_filename) }), content_type='application/json')
def upload_file(request): ''' 上传文件,分词标注 :param request: :return: ''' if request.method == "POST": # 请求方法为POST时,进行处理 myFile = request.FILES.get("myfile", None) # 获取上传的文件,如果没有文件,则默认为None upload_filename = utils.random_string() + "." + myFile.name.split( ".")[-1] if not myFile: return "请上传文件!" ext = upload_filename.split(".")[-1] utils.write_to_file(myFile, os.path.join(utils.DIR_UPLOADS, upload_filename), ext) if ext == "txt" or ext == "csv": total = len( open(os.path.join(utils.DIR_UPLOADS, upload_filename)).readlines()) elif ext == "xls" or ext == "xlsx": total = 0 wb = xlrd.open_workbook( os.path.join(utils.DIR_UPLOADS, upload_filename)) for k in range(len(wb.sheets())): ws = wb.sheet_by_index(k) total += ws.nrows request.session[utils.SESSION_FILE] = upload_filename date = time.strftime('%Y-%m-%d', time.localtime(time.time())) # 将文件信息插入数据库 file_dict = { dbinfo.FILE_FILE: myFile.name, dbinfo.FILE_CODE: upload_filename, dbinfo.FILE_TOTAL: total, dbinfo.FILE_DATE: date, dbinfo.FILE_CHECKED: 0 } utils.get_database(request.session.get(utils.SESSION_DB, "")).insert_file(file_dict) # 上传文件信息写入log utils.logger_file_info(request.session.get(utils.SESSION_USER, ""), "上传分词标注文件", request.session.get(utils.SESSION_DB, ""), myFile.name) request.session[utils.SESSION_ORIGIN_FILE] = myFile.name return HttpResponse("")
def convert_labels_to_registers(intermediate_code_with_labels: str, output_filename='unnamed_executable', save_intermediate_code=False) -> None: labels_dest = dict() labels_to_replace = dict() intermediate_code_with_labels = intermediate_code_with_labels + 'HALT\n' if save_intermediate_code: write_to_file(output_filename + '.intermediate', intermediate_code_with_labels) lines = intermediate_code_with_labels.splitlines(keepends=True) data: str = '' line_counter = 0 for line in lines: if re.match(r'^(##).*\n', line): continue if re.match(r'^\n', line): continue if line.startswith('%label_'): labels_dest[line[1:-1]] = line_counter else: line_counter = line_counter + 1 if re.match(r'.+%label_.*\n', line): r = re.search(r'.+%(label_[0-9]+).*\n', line) if r: lbl = r.group(1) labels_to_replace[lbl] = line_counter else: raise Exception('No valid label in line.') data = data + line for label, reg in labels_dest.items(): regex = rf'%{label}$' data = re.sub(regex, str(reg), data, flags=re.MULTILINE) labels_to_replace[label] = None if all(map(lambda x: x is None, labels_to_replace.values())): print( "[[[ INFO ]]]: Label converter: All labels, found as needing to be replaced, were replaced successfully." ) else: raise Exception( f'Not all labels were replaced! Labels left: ' f'{list(filter(lambda x: labels_to_replace[x] is not None, labels_to_replace.keys()))}' ) data = re.sub(r'^(##).*\n', '', data, flags=re.MULTILINE) # print('_____________________________________________________') with (open(output_filename, 'w', newline='\n')) as file2: file2.write(data)
def submit_form(): # error = None if request.method == 'POST': try: data = request.form.to_dict() print(data) write_to_file(data) write_to_csv(data) msg = 'I\'ll get in touch with you shortly' except Exception as err: msg = 'Error saving to database' else: msg = 'something goes wrong' return render_template('thank_you.html', message=msg)
def check_file(request): ''' 检查上传的数据是否符合规范 :param request: :return: ''' try: upload_filename = request.FILES.get("myfile", None) name = upload_filename.name ext = name.split(".")[-1] utils.write_to_file(upload_filename, "tmp.csv", ext) all_categories = utils.get_database( request.session.get(utils.SESSION_DB, "")).get_categories() error_data, error_type, duplicate_data = [], [], {} i = 0 # 上传标注 for line in open("tmp.csv").readlines(): line = line.strip() if len(line) > 1: if len(line.split("\t")) != 2: error_data.append(line) # 数据格式错误 else: seg, sug = line.split("\t") if sug in all_categories: # 添加的标注需属于已有标注 res, old_sug = utils.get_database( request.session.get(utils.SESSION_DB, "")).is_sug_exist(seg, sug) if res: duplicate_data[seg] = [sug, old_sug] # 添加的标注需属于已有标注 i += 1 else: error_type.append(line) # 标注类型不存在 else: error_data.append(line) data = { 'error': error_data, 'types': error_type, 'duplicate': duplicate_data } except Exception, e: f = open("exp.txt", "w") exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] output = ",".join([str(e), fname, str(exc_tb.tb_lineno)]) f.write(output)
def init_project_hub(self, config_content=None, pref_content=None): """Init project hub with its dependencies.""" self.init_dumbq_setup() self.dumbq_setup.basic_setup() self.dumbq_setup.setup_dumbq_folders() self.dumbq_setup.setup_logger() self.dumbq_setup.setup_public_www() config_content = config_content or self.valid_config_content pref_content = pref_content or "" self.pref_config_source = config["preference_config_source"] write_to_file(self.config_source, config_content) write_to_file(self.pref_config_source, pref_content) self.run_dir = self.config["dumbq_rundir"] self.project_hub = ProjectHub(self.config, self.feedback, self.logger)
def init_project_hub(self, config_content=None, pref_content=None): """Init project hub with its dependencies.""" self.init_dumbq_setup() self.dumbq_setup.basic_setup() self.dumbq_setup.setup_dumbq_folders() self.dumbq_setup.setup_logger() self.dumbq_setup.setup_public_www() config_content = config_content or self.valid_config_content pref_content = pref_content or "" self.pref_config_source = config["preference_config_source"] write_to_file(self.config_source, config_content) write_to_file(self.pref_config_source, pref_content) self.run_dir = self.config["dumbq_rundir"] self.project_hub = ProjectHub( self.config, self.feedback, self.logger )
def main(update, hammer): bot = Binance() time_range = 8 base_coin = update or 'USDT' interval = hammer or '15m' _file_path_hammer = assemble_file_path(FILE_PATH_HAMMER, interval) coin_pairs = bot.get_coin_pairs(base_coin) for coin_pair in coin_pairs: print(f'Analyzing {coin_pair} coin pair...') try: kline_data = bot.get_candlestick_data(symbol=coin_pair, interval=interval) except NotEnoughArguments as e: print(e) candles = [Candlestick(candlestick_idx, data, interval) for candlestick_idx, data in enumerate(kline_data)] print(f'COIN PAIR: {coin_pair}') for c in candles: res = c.get_resistance_area(candles) supp = c.get_support_area(candles) if supp: print('\nSUPPORT') pp(c.get_data()) if res: print('\nRESISTANCE') pp(c.get_data()) exit() hammers = {} for candle in candles: try: if candle.is_hammer() and candle.is_in_timerange(time_range): if not hammers.get(coin_pair): hammers[coin_pair] = [] hammers[coin_pair].append(candle.get_data()) except ZeroDivisionError: print(f'ZeroDivisionError for coin pair: {coin_pair}') pp(candle.get_data()) if hammers: was_writing_successful = write_to_file(json.dumps(hammers, indent=4), _file_path_hammer) print(f'Writing into file {_file_path_hammer} was successful.' if was_writing_successful else f'Something went wrong while writing into file {_file_path_hammer}.') else: print(f'For coin pair {coin_pair} hammer was not found.')
def __init__(self, txt_path, img_size, with_label, is_training ): # clw note: (1) with_label=True, is_training=True -> train # (2) with_label=True, is_training=False -> test(no aug) # (3) with_label=False, is_training=False -> detect # 1、获取所有图片路径,存入 list with open(txt_path, 'r') as f: self.img_file_paths = [ x.replace(os.sep, '/') for x in f.read().splitlines() ] assert len( self.img_file_paths) > 0, 'No images found in %s !' % txt_path # 2、获取所有 txt 路径,存入 list self.label_file_paths = [] for img_file_path in self.img_file_paths: txt_file_path = img_file_path[:-4] + '.txt' assert os.path.isfile( txt_file_path ), 'No label_file %s found, maybe need to exec xml2txt.py first !' % txt_file_path self.label_file_paths.append( txt_file_path) # 注意除了有 .jpg .png可能还有.JPG甚至其他... if len(self.label_file_paths) == 0: with_label = False self.mosaic = False if is_training and with_label: self.mosaic = False # clw note: TODO: if use mosaic, remove RandomCrop(), RandomAffine(), LetterBox(img_size), in transforms.py if self.mosaic: print('using mosaic !') write_to_file('using mosaic !', log_file_path) # 3、transforms and data aug,如必须要做的 Resize(), ToTensor() self.transforms = build_transforms(img_size, is_training) self.img_size = img_size self.with_label = with_label self.is_training = is_training
def __init__(self, p=0.5): self.p = p print('using RandomHorizontalFlip !') write_to_file('using RandomHorizontalFlip !', log_file_path)
if weights.endswith('.pt'): ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层 # 会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.Size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]). # TODO:map_location=device ? chkpt = torch.load(weights, map_location=device) try: chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(chkpt['model'], strict=False) # model.load_state_dict(chkpt['model']) except KeyError as e: s = "%s is not compatible with %s" % (opt.weights, opt.cfg) raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif weights.endswith('.pth'): # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth', model_state_dict = model.state_dict() chkpt = torch.load(weights, map_location=device) #try: state_dict = {} block_cnt = 0 fc_item_num = 2 chkpt_keys = list(chkpt.keys()) model_keys = list(model.state_dict().keys()) model_values = list(model.state_dict().values()) for i in range(len(chkpt_keys) - fc_item_num): # 102 - 2 if i % 5 == 0: state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]]
def test(cfg, data, batch_size, img_size, conf_thres, iou_thres, nms_thres, src_txt_path, weights, log_file_path=None, model=None): # 0、初始化一些参数 data = parse_data_cfg(data) nc = int(data['classes']) # number of classes names = load_classes(data['names']) # 1、加载网络 if model is None: device = select_device('0') model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights, map_location=device)['model'] ) # 20200704_50epoch_modify_noobj # TODO:map_location=device ? if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # clw note: 多卡 else: device = next(model.parameters()).device # get model device model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=True, is_training=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn, # TODO pin_memory=True) # 3、预测,前向传播 image_nums = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@{}'.format(iou_thres), 'F1') #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(dataloader) for i, (img_tensor, target_tensor, _, _) in enumerate(pbar): img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) target_tensor = target_tensor.to(device) height, width = img_tensor.shape[2:] start = time.time() # Disable gradients with torch.no_grad(): # (1) Run model output = model( img_tensor ) # (x1, y1, x2, y2, obj_conf, class_conf, class_pred) # (2) NMS nms_output = non_max_suppression(output, conf_thres, nms_thres) s = 'time use per batch: %.3fs' % (time.time() - start) pbar.set_description(s) for batch_idx, pred in enumerate(nms_output): # pred: (bs, 7) labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:] nl = len(labels) # len of label tcls = labels[:, 0].tolist() if nl else [] # target class image_nums += 1 # 考虑一个预测 box 都没有的情况,比如 conf 太高 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低 clip_coords(pred, (height, width)) # mAP is the same # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2] # w tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1] # h # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) ''' pred flag ( [1, 0, 1, 0, 0, 1, 0, 0, 1], pred conf tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), pred cls tensor([2., 2., 2., 2., 2., 2., 2., 2., 2.]), lb_cls [2.0, 2.0, 2.0, 2.0, 2.0]) stats is a [] ''' stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) # Append statistics (correct, conf, pcls, tcls) # after get stats for all images , ... # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results # time.sleep(0.01) # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了 #pf = '%20s' + '%10.3g' * 6 # print format pf = '%20s' + '%10s' + '%10.3g' * 5 pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1) print(pf_value) if __name__ != '__main__': write_to_file(s, log_file_path) write_to_file(pf_value, log_file_path) results = [] results.append({"all": (mp, mr, map, mf1)}) # Print results per class #if verbose and nc > 1 and len(stats): if nc > 1 and len(stats): for i, c in enumerate(ap_class): #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i])) if __name__ != '__main__': write_to_file( pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]), log_file_path) results.append({names[c]: (p[i], r[i], ap[i], f1[i])}) # Return results maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1), maps
import torch.nn.functional as F ### 超参数 #lr0 = 1e-3 # 太大,在前几个epoch的mAP反而并不好;在kaggle小麦数据集上损失甚至会溢出; lr0 = 1e-4 momentum = 0.9 weight_decay = 0.0005 ### ### 混合精度训练 ### mixed_precision = True try: # Mixed precision training https://github.com/NVIDIA/apex from apex import amp except: print('Waring: No Apex !!! ') # https://github.com/NVIDIA/apex write_to_file('Waring: No Apex !!! ', log_file_path) mixed_precision = False # not installed if mixed_precision: print('Using Apex !!! ') write_to_file('Using Apex !!! ', log_file_path) ###### if __name__ == '__main__': parser = argparse.ArgumentParser() #parser.add_argument('--cfg', type=str, default='cfg/CSPDarknet53-PANet-SPP.cfg', help='xxx.cfg file path') # parser.add_argument('--cfg', type=str, default='cfg/resnet18.cfg', help='xxx.cfg file path') # parser.add_argument('--cfg', type=str, default='cfg/resnet50.cfg', help='xxx.cfg file path') # parser.add_argument('--cfg', type=str, default='cfg/resnet101.cfg', help='xxx.cfg file path') # parser.add_argument('--cfg', type=str, default='cfg/voc_yolov3-spp.cfg', help='xxx.cfg file path') parser.add_argument('--cfg', type=str,
def test_host_uuid_from_uuid_file(self): """Test host uuid is from the uuid file, if cernvm not present.""" new_uuid = str(uuid4()) write_to_file(self.uuid_fp, new_uuid) self.init_hw_info() self.assertEqual(new_uuid, self.hardware_info.host_uuid)
def _simulate_container_creation(self, container_name): """Simulate the environment as we cannot run project inside Docker.""" container_www_fp, run_fp = self.get_wwwfolder_and_runfp(container_name) os.mkdir(container_www_fp) write_to_file(run_fp, "") return container_www_fp, run_fp
from utils.utils import write_to_file, read_from_file from utils.test_utils import safe_repr # Set up testing environment and logger at start config["testing"] = True test_logfile = os.path.join(os.getcwd(), "dumbq-testing.log") config["test_logfile"] = test_logfile logger = ConsoleLogger(config) now = datetime.datetime.now() test_headline = "\n".join(( "*******************************************************", "** Starting test suite at " + str(now) + " **", "*******************************************************\n", )) write_to_file(test_logfile, test_headline) class BaseDumbqTest(unittest.TestCase): """Base test class that initializes and destroys DumbQ environment.""" def setUp(self): """Set up and replicate the production environment before testing.""" self.config = config self.feedback = feedback # Just declare, lazy initialization self.logger = logger self.hardware_info = None self.dumbq_setup = None
def __init__(self, p=0.5): self.p = p print('using RandomTranslate !') write_to_file('using RandomTranslate !', log_file_path)
from utils.utils import write_to_file, read_from_file from utils.test_utils import safe_repr # Set up testing environment and logger at start config["testing"] = True test_logfile = os.path.join(os.getcwd(), "dumbq-testing.log") config["test_logfile"] = test_logfile logger = ConsoleLogger(config) now = datetime.datetime.now() test_headline = "\n".join(( "*******************************************************", "** Starting test suite at " + str(now) + " **", "*******************************************************\n", )) write_to_file(test_logfile, test_headline) class BaseDumbqTest(unittest.TestCase): """Base test class that initializes and destroys DumbQ environment.""" def setUp(self): """Set up and replicate the production environment before testing.""" self.config = config self.feedback = feedback # Just declare, lazy initialization self.logger = logger self.hardware_info = None self.dumbq_setup = None self.project_hub = None self.project_manager = None
def train(): # 0、Initialize parameters( set random seed, get cfg info, ) cfg = opt.cfg weights = opt.weights img_size = opt.img_size batch_size = opt.batch_size total_epochs = opt.epochs init_seeds() data = parse_data_cfg(opt.data) train_txt_path = data['train'] valid_txt_path = data['valid'] nc = int(data['classes']) # 0、打印配置文件信息,写log等 print('config file:', cfg) print('pretrained weights:', weights) # 1、加载模型 model = Darknet(cfg).to(device) if weights.endswith('.pt'): ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层 # 会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]). # TODO:map_location=device ? chkpt = torch.load(weights, map_location=device) try: chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(chkpt['model'], strict=False) # model.load_state_dict(chkpt['model']) except KeyError as e: s = "%s is not compatible with %s" % (opt.weights, opt.cfg) raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif weights.endswith('.pth'): # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth', model_state_dict = model.state_dict() chkpt = torch.load(weights, map_location=device) #try: state_dict = {} block_cnt = 0 fc_item_num = 2 chkpt_keys = list(chkpt.keys()) model_keys = list(model.state_dict().keys()) model_values = list(model.state_dict().values()) for i in range(len(chkpt_keys) - fc_item_num): # 102 - 2 if i % 5 == 0: state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]] elif i % 5 == 1 or i % 5 == 2: state_dict[model_keys[i+block_cnt+2]] = chkpt[chkpt_keys[i]] elif i % 5 == 3 or i % 5 == 4: state_dict[model_keys[i+block_cnt-2]] = chkpt[chkpt_keys[i]] if i % 5 == 4: block_cnt += 1 state_dict[model_keys[i + block_cnt]] = model_values[i + block_cnt] #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(state_dict, strict=False) # model.load_state_dict(chkpt['model']) # except KeyError as e: # s = "%s is not compatible with %s" % (opt.weights, opt.cfg) # raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif len(weights) > 0: # darknet format # possible weights are '*.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc. load_darknet_weights(model, weights) write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) # else: # raise Exception("pretrained model's path can't be NULL!") # 2、设置优化器 和 学习率 start_epoch = 0 #optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=weight_decay, nesterov=True) # TODO:nesterov ? weight_decay=0.0005 ? # Optimizer pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if '.bias' in k: pg2 += [v] # biases elif 'Conv2d.weight' in k: pg1 += [v] # apply weight_decay else: pg0 += [v] # parameter group 0 optimizer = torch.optim.SGD(pg0, lr=lr0, momentum=momentum, nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) del pg0, pg1, pg2 ###### apex need ###### if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # Initialize distributed training if torch.cuda.device_count() > 1: dist.init_process_group(backend='nccl', # 'distributed backend' init_method='tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错 model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level ###### model.nc = nc #### 阶梯学习率 scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1) ### 余弦学习率 #lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2 #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # 3、加载数据集 train_dataset = VocDataset(train_txt_path, img_size, with_label=True) dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, # TODO: True num_workers=8, # TODO collate_fn=train_dataset.train_collate_fn, pin_memory=True) # 4、训练 print('') # 换行 print('Starting training for %g epochs...' % total_epochs) nb = len(dataloader) mloss = torch.zeros(4).to(device) # mean losses writer = SummaryWriter() # tensorboard --logdir=runs, view at http://localhost:6006/ prebias = start_epoch == 0 for epoch in range(start_epoch, total_epochs): # epoch ------------------------------ model.train() # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval() # # Prebias # if prebias: # if epoch < 3: # prebias # ps = 0.1, 0.9 # prebias settings (lr=0.1, momentum=0.9) # else: # normal training # ps = lr0, momentum # normal training settings # print_model_biases(model) # prebias = False # # # Bias optimizer settings # optimizer.param_groups[2]['lr'] = ps[0] # if optimizer.param_groups[2].get('momentum') is not None: # for SGD but not Adam # optimizer.param_groups[2]['momentum'] = ps[1] start = time.time() title = ('\n' + '%10s' * 11 ) % ('Epoch', 'Batch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', 'lr', 'time_use') print(title) #pbar = tqdm(dataloader, ncols=20) # 行数参数ncols=10,这个值可以自己调:尽量大到不能引起上下滚动,同时满足美观的需求。 #for i, (img_tensor, target_tensor, img_path, _) in enumerate(pbar): # # Freeze darknet53.conv.74 for first epoch # freeze_backbone = False # if freeze_backbone and (epoch < 3): # for i, (name, p) in enumerate(model.named_parameters()): # if int(name.split('.')[2]) < 75: # if layer < 75 # 多卡是[2],单卡[1] # p.requires_grad = False if (epoch < 3) else True for i, (img_tensor, target_tensor, img_path, _) in enumerate(dataloader): # # SGD burn-in # ni = epoch * nb + i # if ni <= 1000: # n_burnin = 1000 # lr = lr0 * (ni / 1000) ** 2 # for g in optimizer.param_groups: # g['lr'] = lr batch_start = time.time() #print(img_path) img_tensor = img_tensor.to(device) target_tensor = target_tensor.to(device) ### 训练过程主要包括以下几个步骤: # (1) 前传 #print('img_tensor:', img_tensor[0][1][208][208]) pred = model(img_tensor) # (2) 计算损失 loss, loss_items = compute_loss(pred, target_tensor, model) if not torch.isfinite(loss): raise Exception('WARNING: non-finite loss, ending training ', loss_items) # (3) 损失:反向传播,求出梯度 if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # (4) 优化器:更新参数、梯度清零 # ni = i + nb * epoch # number integrated batches (since train start) # if ni % accumulate == 0: # Accumulate gradient for x batches before optimizing optimizer.step() optimizer.zero_grad() # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 # (GB) #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start) s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) if i % 10 == 0: print(s) # Plot if epoch == start_epoch and i == 0: fname = 'train_batch.jpg' # filename cur_path = os.getcwd() res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname)) writer.add_image(fname, res, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ print('time use per epoch: %.3fs' % (time.time() - start)) write_to_file(title, log_file_path) write_to_file(s, log_file_path) # Update scheduler scheduler.step() # compute mAP results, maps = test.test(cfg, 'cfg/voc.data', batch_size=batch_size, img_size=img_size, conf_thres=0.05, iou_thres=0.5, nms_thres=0.5, src_txt_path=valid_txt_path, dst_path='./output', weights=None, model=model, log_file_path = log_file_path) # Tensorboard tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1'] for x, tag in zip(list(mloss[:-1]) + list(results), tags): writer.add_scalar(tag, x, epoch) # save model 保存模型 chkpt = {'epoch': epoch, 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(), # clw note: 多卡 'optimizer': optimizer.state_dict()} torch.save(chkpt, last_model_path) print('end')