def log_request(record): global hpclient req = json.dumps(record) LOGGER.info(req) if hpclient and record['is_shellshock']: hpclient.publish(app.config['hpfeeds.channel'], req)
def get_custom_field(self, field_key): result = '' try: result = self.custom_fields[field_key] except KeyError: LOGGER.warn('Issue %s does not have value for field %s' % (self.key, field_key)) return result
def end_suite(self, suite): self._separator('SUITE') self._end('SUITE', suite.longname, suite.elapsedtime) self._separator('SUITE') if self._indent == 0: LOGGER.output_file('Debug', self._file.name) self.close()
def end_suite(self, suite): LOGGER.end_suite(suite) if self._xmllogger.ended_output: LOGGER.output_file('Output', self._xmllogger.ended_output) orig_outpath = self._settings['Output'] suite.context.output_file_changed(orig_outpath) self._create_split_log(self._xmllogger.ended_output, suite)
def update_issue_status(status_list): for item in status_list: status = item['status'].upper() key = item['key'] update_query = DbJiraIssues.update(dc_status=status).where(DbJiraIssues.key == key) update_query.execute() LOGGER.debug(update_query)
def rename(self, path, path1): # Rename is handled by copying and deleting files... LOGGER.debug("rename %s %s" % (path, path1)) d = self.get_dir(path) if self.is_valid_file(path) and d.is_file(path): if not self.is_valid_file(path1): # from a valid file to an editor file buf = self.get_file_buf(path1) buf.write(d.read_file(path)) # TODO : remove path ? else: # from a valid file to a valid file # if rename is defined # TODO : with unlink method defined in fs pass elif not self.is_valid_file(path): if self.is_valid_file(path1) and d.is_file(path1): # from an editor file to a valid file buf = self.get_file_buf(path) ret = d.write_to(path1, buf.getvalue()) self.open_mode = None self.remove_file_buf(path) if ret == False: return -errno.EIO elif not self.is_valid_file(path): # from an editor file to an editor file # TODO pass
def checkTrame(self): if self.trameUsed: LOGGER.debug("Trame received : {}".format(self.trameUsed.lessRawView())) if ("A55A" not in self.trameUsed.sep): LOGGER.warn("Wrong separator, rejected") if (self.doChecksum(self.trameUsed) not in self.trameUsed.checkSum): #Mauvais checkSum LOGGER.warn("Wrong checksum, expected : {}, rejected".format(self.doChecksum(self.trameUsed))) with self.lock: if (self.trameUsed.ident in self.identSet): #Recuperer le capteur en bdd sensorUsed = sensor.Sensor.objects(physic_id=self.trameUsed.ident)[0] newData = '' #la nouvelle data a entrer en base, type dynamique if (sensorUsed.__class__.__name__=="Switch"): newData=sensorUsed.translateTrame(self.trameUsed) elif (sensorUsed.__class__.__name__=="Temperature"): newData = sensorUsed.translateTrame(self.trameUsed) elif (sensorUsed.__class__.__name__=="Position"): newData = sensorUsed.translateTrame(self.trameUsed) else : LOGGER.warn("Other Captor (not handle (YET !) )") # Update de la trame au niveau de la base if newData : sensorUsed.update(newData) LOGGER.info(" Sensor {} ||New data {}".format(sensorUsed.physic_id, sensorUsed.current_state)) self.trameUsed=''
def log_request(record): global hpclient req = json.dumps(record) LOGGER.info(req) if hpclient and (record['is_shellshock'] or app.config['hpfeeds.only_exploits'].lower() == 'false'): hpclient.publish(app.config['hpfeeds.channel'], req)
def unlink(self, path): LOGGER.debug("FSdir unlink %s" % (path)) file_name = self.get_article_file_name(path) if self.files.has_key(file_name): self.files.pop(file_name) return True # succeeded else: return False
def format_date(date_string): result = date_string try: time_struct = strptime(date_string, "%d/%b/%y") result = date.fromtimestamp(mktime(time_struct)) except (TypeError, ValueError): LOGGER.warn('[%s] is not valid date' % date_string) return result
def translateTrame(self,inTrame): """ return the temperature (range 0-40 c) from data byte 2 """ rowTemp=int(inTrame.data1,16) temperature = round((rowTemp*40/255.0),3) LOGGER.info("Temperature sensor {} with temp {}".format(inTrame.ident, temperature)) return temperature
def utime(self, path, times): LOGGER.debug("utime %s %s" % (path, times)) d = self.get_dir(path) if dir(d).count("utime") == 0: return -errno.ENOSYS # Not implemented else: return d.utime(path, times)
def sendTrame(self,ident,newState): with self.lock: sensorUsed=sensor.Device.objects(physic_id=ident)[0] daTrame=sensorUsed.gimmeTrame(newState) if daTrame: self.soc.send(daTrame) LOGGER.info("Trame sended : {}".format(daTrame)) return
def updateOne(self,ident): """ Ask for update the sensor with this id """ LOGGER.info("lazily updating {}".format(ident)) self.idToUpdate=ident self.newState='' self.save()
def receive (self) : # LOGGER.debug("en attente de trame") message = self.soc.recv(1024) if message and len(message)==28: LOGGER.debug("trame reçu : {}".format(message)) self.trameUsed = Trame.trame(message) else : return
def sendTrame(self,ident,newState): """ Ask the traductor to send a trame with the new state of a captor """ LOGGER.info("Lazily updating {} with {}".format(ident,newState)) self.idToUpdate=ident self.newState=newState self.save()
def web_request(program, url): LOGGER.info('Performing {} request on {}'.format(program, url)) data = '' try: resp = requests.get(url, headers={'User-Agent': USER_AGENTS[program]}) data = resp.text except Exception as e: LOGGER.error(e) return '{} {}'.format(program, url), data
def open(self, path, flags): LOGGER.debug("open %s %d" % (path, flags)) if not self.files.has_key(path): if self.is_valid_file(path): buf = self.get_file_buf(path) d = self.get_dir(path) txt = d.read_file(path) buf.write(txt)
def _import_listener(self, name, args): listener, source = utils.import_(name, 'listener') if not inspect.ismodule(listener): listener = listener(*args) elif args: raise DataError("Listeners implemented as modules do not take arguments") LOGGER.info("Imported listener '%s' with arguments %s (source %s)" % (name, utils.seq2str2(args), source)) return listener
def read(self, path, size, offset): LOGGER.debug("read %s %d %d" % (path, size, offset)) self.open_mode = self.READ buf = self.get_file_buf(path) buf.seek(offset) return buf.read(size)
def retro_browse_points(request): selected_team_name = 'allteams' selected_sprint_number = 'allsprints' selected_sticker_type = 'alltypes' count = 15 teams = Team.objects.all().order_by('name') sprints = Sprint.objects.all().order_by('-number') types = BoardSticker.TYPE_CHOICES stickers = BoardSticker.objects.all().order_by('-creation_date') try: selected_team_name = request.GET["team"] selected_sprint_number = request.GET["sprint"] selected_sticker_type = request.GET["type"] count = request.GET["count"] except KeyError: LOGGER.warn('Request with incorrect parameters. Using defaults.') if selected_team_name != 'allteams': selected_team = Team.objects.get(name=selected_team_name) board = RetroBoard.objects.filter(team=selected_team) stickers = stickers.filter(retroBoard__in=board) if selected_sprint_number != 'allsprints': selected_sprint = Sprint.objects.get(number=selected_sprint_number) boards = RetroBoard.objects.filter(sprint=selected_sprint) stickers = stickers.filter(retroBoard__in=boards) if selected_sticker_type != 'alltypes': stickers = stickers.filter(type=selected_sticker_type) stickers = stickers[:count] types_dict = {} for item in BoardSticker.TYPE_CHOICES: types_dict[item[0]] = item[1] for point in stickers: point.type_str = types_dict[point.type] if selected_sprint_number != 'allsprints': selected_sprint_number = int(selected_sprint_number) return render_to_response('retro/dpq_retro_action_points.html', RequestContext(request, {'stickers': stickers, 'teams': teams, 'sprints': sprints, 'types': types, 'selected_team': selected_team_name, 'selected_sprint': selected_sprint_number, 'selected_type': selected_sticker_type, 'count': int(count), 'count_options': [15, 30, 45, 60], 'active_branches': get_active_branches()}))
def call_method(self, method, *args): if self.is_java: args = [self._to_map(a) if isinstance(a, dict) else a for a in args] try: method(*args) except: message, details = utils.get_error_details() LOGGER.error("Calling listener method '%s' of listener '%s' failed: %s" % (method.__name__, self.name, message)) LOGGER.info("Details:\n%s" % details)
def get_page(self): if self.page is None: LOGGER.info('Fetching page contents from Confluence') data = self.server.getPage( self.get_token(), self.settings.namespace, self.settings.pagename ) self.page = ConfluencePage(data) return self.page
def write(self, path, txt, offset): LOGGER.debug("write %s [...] %d" % (path, offset)) self.open_mode = self.WRITE buf = self.get_file_buf(path) buf.seek(offset) buf.write(txt) return len(txt)
def mkdir(self, path, mode): LOGGER.debug("mkdir %s %x" % (path, mode)) d = self.get_dir(path) if dir(d).count("mkdir") == 0: return -errno.EACCES # Permission denied else: res = d.mkdir(path) if res != True: return -errno.EACCES # Permission denied
def rmdir(self, path): LOGGER.debug("rmdir %s" % path) d = self.get_dir(path) if dir(d).count("rmdir") == 0: return -errno.EACCES # Permission denied else: res = d.rmdir(path) if res != True: return -errno.EACCES # Permission denied
def _email_config(self): try: self.mail_server = smtplib.SMTP('smtp.gmail.com', 587) self.mail_server.ehlo() self.mail_server.starttls() self.mail_server.login(self.gmail_user, self.gmail_password) except Exception as e: LOGGER.info("Failed to connnect. Error: {}".format(e)) exit()
def close(self, suite): stats = Statistics(suite, self._settings['SuiteStatLevel'], self._settings['TagStatInclude'], self._settings['TagStatExclude'], self._settings['TagStatCombine'], self._settings['TagDoc'], self._settings['TagStatLink']) stats.serialize(self._xmllogger) self._xmllogger.close(serialize_errors=True) LOGGER.unregister_logger(self._xmllogger) LOGGER.output_file('Output', self._settings['Output'])
def get_issues(self, issues, limit=300): result = [] keys = ','.join(issues) request = 'project=%s AND key in (%s)' % (self.settings.project, keys) LOGGER.debug(request) response = self.proxy.getIssuesFromJqlSearch(self.get_token(), request, Types.intType(limit)) for item in response: issue = JiraIssue() issue.parse_raw(item) result.append(issue) return result
def unlink(self, path): LOGGER.debug("unlink %s" % path) d = self.get_dir(path) self.remove_file_buf(path) if self.is_valid_file(path): if dir(d).count("unlink") == 0: return -errno.EACCES # Permission denied else: res = d.unlink(path) if res != True: return -errno.EACCES # Permission denied
async def dashboard(hostname, sar_params, time_range, nested_elem): config.read(CFG_PATH) api_endpoint = config.get('Grafana','api_url') payload = { "ts_beg": time_range['grafana_range_begin'], "ts_end": time_range['grafana_range_end'], "nodename": hostname, "modes": sar_params, "nested_elem":nested_elem } LOGGER.debug(api_endpoint) LOGGER.debug(payload) try: res = requests.post(api_endpoint, json=payload) if res.status_code == 200: LOGGER.debug("status code: %s" % res.status_code) LOGGER.debug("content: \n%s" % res.content) LOGGER.debug("Dashboard created for -- %s" % hostname); else: LOGGER.warn("status code: %s" % res.status_code) LOGGER.warn("content: \n%s" % res.content) slug = json.loads(res.text)['slug'] LOGGER.debug(json.loads(res.text)) LOGGER.debug(slug) except ConnectionError: LOGGER.error("endpoint not active. Couldn't connect.") slug = None except Exception as e: LOGGER.error(str(e)) LOGGER.error("unknown error. Couldn't trigger request.") slug = None return slug
response = weather_api.read() response_dictionary = json.loads(response) forecast_api = urllib2.urlopen(request_2) response_2 = forecast_api.read() response_2_dictionary = json.loads(response_2) except Exception: wtr = 'Failed to connect to Open Weather Map. ' try: current = response_dictionary['main']['temp'] current_low = response_dictionary['main']['temp_min'] current_high = response_dictionary['main']['temp_max'] conditions = response_dictionary['weather'][0]['description'] except KeyError: LOGGER.error('Unable to read links') raise RuntimeError('Unable to read links') current = str(round(current, 1)).replace('.', ' point ') current_low = str(round(current_low, 1)).replace('.', ' point ') current_high = str(round(current_high, 1)).replace('.', ' point ') todays_low = response_2_dictionary['list'][0]['main']['temp_min'] todays_high = response_2_dictionary['list'][0]['main']['temp_max'] todays_low_str = str(round(todays_low, 1)).replace('.', ' point ') todays_high_str = str(round(todays_high, 1)).replace('.', ' point ') LOGGER.info('Max:, {}, Min:, {}'.format(todays_high, todays_low)) wtr = ('Weather conditions for today, ' + conditions + ' with a current temperature of ' + current) frc = (', a low of ' + todays_low_str + ' and a high of ' + todays_high_str +
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape( -1, 1) y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape( -1, 1) y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape( -1, 1) y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape( -1, 1) y = np.concatenate([y1, y2, y3, y4], axis=1) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0, class_y=y_train) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam") model = convert_model(model) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam([ { 'params': model.decoder.parameters(), 'lr': 3e-3 }, { 'params': model.encoder.parameters(), 'lr': 3e-4 }, ]) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) if EMA: ema_model = copy.deepcopy(model) if base_model_ema is not None: ema_model.load_state_dict(torch.load(base_model_ema)) ema_model.to(device) else: ema_model = None model = torch.nn.DataParallel(model) ema_model = torch.nn.DataParallel(ema_model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ema_loss = 999 best_model_ep = 0 ema_decay = 0 checkpoint = base_ckpt + 1 for epoch in range(102, EPOCHS + 1): seed = seed + epoch seed_torch(seed) if epoch >= EMA_START: ema_decay = 0.99 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0, classification=CLASSIFICATION, ema_model=ema_model, ema_decay=ema_decay) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss = validate(model, val_loader, criterion, device, classification=CLASSIFICATION) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) if EMA and epoch >= EMA_START: ema_valid_loss = validate(ema_model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean EMA valid loss: {}'.format( round(ema_valid_loss, 5))) if ema_valid_loss < best_model_ema_loss: torch.save( ema_model.module.state_dict(), 'models/{}_fold{}_ckpt{}_ema.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_ema_loss = ema_valid_loss scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1: torch.save( model.module.state_dict(), 'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID)) LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) if EMA: torch.save( ema_model.module.state_dict(), 'models/{}_fold{}_latest_ema.pth'.format( EXP_ID, FOLD_ID)) LOGGER.info('Best ema valid loss: {}'.format( round(best_model_ema_loss, 5))) checkpoint += 1 best_model_loss = 999 #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
BATCH_SIZE = 32 EPOCHS = 125 FOLD_ID = 1 EXP_ID = "exp55_unet_resnet" CLASSIFICATION = True EMA = True EMA_START = 6 base_ckpt = 17 base_model = None base_model_ema = None base_model = "models/{}_fold{}_latest.pth".format(EXP_ID, FOLD_ID) base_model_ema = "models/{}_fold{}_latest_ema.pth".format(EXP_ID, FOLD_ID) setup_logger(out_file=LOGGER_PATH) seed_torch(SEED) LOGGER.info("seed={}".format(SEED)) @contextmanager def timer(name): t0 = time.time() yield LOGGER.info('[{}] done in {} s'.format(name, round(time.time() - t0, 2))) def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape( -1, 1) y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(
def main(): with timer('load data'): df = pd.read_csv(TRAIN_PATH) df = df[df.Image != "ID_6431af929"].reset_index(drop=True) df.loc[df.pre_SOPInstanceUID == "ID_6431af929", "pre1_SOPInstanceUID"] = df.loc[df.pre_SOPInstanceUID == "ID_6431af929", "Image"] df.loc[df.post_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] = df.loc[df.post_SOPInstanceUID == "ID_6431af929", "Image"] df.loc[df.prepre_SOPInstanceUID == "ID_6431af929", "pre2_SOPInstanceUID"] = df.loc[df.prepre_SOPInstanceUID == "ID_6431af929", "pre1_SOPInstanceUID"] df.loc[df.postpost_SOPInstanceUID == "ID_6431af929", "post2_SOPInstanceUID"] = df.loc[df.postpost_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] y = df[TARGET_COLUMNS].values df = df[[ "Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID", "pre2_SOPInstanceUID", "post2_SOPInstanceUID" ]] gc.collect() with timer('preprocessing'): train_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), HorizontalFlip(p=0.5), OneOf([ ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(p=0.5), OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5) ], p=0.5), Rotate(limit=30, border_mode=0, p=0.7), Resize(img_size, img_size, p=1) ]) train_dataset = RSNADataset(df, y, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=train_augmentation, black_crop=False, three_window=True, rescaling=False, pick_type="post_post") train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) del df, train_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") if model_path is not None: model.load_state_dict(torch.load(model_path)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss( weight=torch.FloatTensor([2, 1, 1, 1, 1, 1]).cuda()) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4) model = torch.nn.DataParallel(model) with timer('train'): for epoch in range(1, epochs + 1): if epoch == 5: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.1 seed_torch(SEED + epoch) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) torch.save(model.module.state_dict(), 'models/{}_ep{}.pth'.format(EXP_ID, epoch))
def setup_queue(self, queue_name): LOGGER.info('Declaring queue %s', queue_name) cb = functools.partial(self.on_queue_declareok, userdata=queue_name) self._channel.queue_declare(queue=queue_name, callback=cb)
def start(self): if len(self.players) != Othello.player_num: LOGGER.error(u"invalid player num:{}, expected:{}".format( len(self.players), Othello.player_num)) return LOGGER.info(u"Othello game started!") LOGGER.info(u"choosing player...") offset = random.randint(0, Othello.player_num - 1) for idx, p in enumerate(self.piece_pool): tmp_player = self.players[(idx + offset) % len(self.players)] # self.piece_dict[p] = tmp_player tmp_player.set_piece(p) self.round = 0 pass_time = 0 LOGGER.info(u"start moving...") last_player = None while True: if pass_time == len(self.players): LOGGER.info(u"all players has no way to go, game is finished!") break cur_player = self.players[(self.round + offset) % len(self.players)] cur_piece = cur_player.piece if last_player: last_player.notify_status(self.board, cur_piece) self.round += 1 LOGGER.debug(u"current board") LOGGER.debug(board2str(self.board)) valid_points = get_valid_points(self.board, cur_piece) LOGGER.debug(u"valid points:{}".format(valid_points)) LOGGER.info(u"round{} player[{}] putting {} ".format( self.round, cur_player, cur_player.piece)) if not valid_points: valid_points.append((None, None)) action = cur_player.play(valid_points, self.board) last_player = cur_player if action not in valid_points: LOGGER.error( "invalid action :{}, pass this round".format(action)) pass_time += 1 continue if action == (None, None): LOGGER.info(u"player[{}] has no way to go".format(cur_player)) pass_time += 1 continue LOGGER.info(u"player[{}] put {} to point {}".format( cur_player, cur_piece, action)) self.put_piece(action[0], action[1], cur_piece) pass_time = 0 score = get_score(self.board) if score > 0: win_piece = BLACK elif score < 0: win_piece = WHITE else: win_piece = None LOGGER.info(u"THIS IS A DRAW GAME!") winner = None if win_piece: for player in self.players: LOGGER.debug(u"notifying player[{}] of reward".format(player)) player.notify_win(win_piece) if player.piece == win_piece: LOGGER.info( u"piece:{}[player:{}] WIN! WITH SCORE={}".format( win_piece, player, abs(score))) winner = player LOGGER.info(u"game finish!") return winner
def timer(name): t0 = time.time() yield LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s')
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) if N_CLASSES == 3: df.drop("EncodedPixels_2", axis=1, inplace=True) df = df.rename(columns={"EncodedPixels_3": "EncodedPixels_2"}) df = df.rename(columns={"EncodedPixels_4": "EncodedPixels_3"}) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model.load_state_dict(torch.load(base_model)) model.to(device) model.eval() criterion = torch.nn.BCEWithLogitsLoss() with timer('predict'): valid_loss, y_pred, y_true, cls = predict(model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scores = [] all_scores = [] min_sizes = [300, 0, 600, 1600] for i in range(N_CLASSES): if i == 1: continue best = 0 count = 0 min_size = min_sizes[i] for th in [0.7+i*0.01 for i in range(30)]: val_preds_ = copy.deepcopy(y_pred[:, i, :, :]) scores_ = [] all_scores_ = [] for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_): y_pred_ = post_process(y_pred_ > 0.5, y_pred_, min_size, th) score = dice(y_val_, y_pred_) if np.isnan(score): scores_.append(1) else: scores_.append(score) LOGGER.info('dice={} on {}'.format(np.mean(scores_), th)) if np.mean(scores_) >= best: best = np.mean(scores_) count = 0 else: count += 1 if count == 3: break scores.append(best) all_scores.append(all_scores_) LOGGER.info('holdout dice={}'.format(np.mean(scores)))
def main(): with timer('load data'): df = pd.read_csv(TRAIN_PATH) df["loc_x"] = df["loc_x"] / 100 df["loc_y"] = df["loc_y"] / 100 y = df[TARGET_COLUMNS].values df = df[[ID_COLUMNS]] gc.collect() with timer("split data"): if y.shape[1] == 1: folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0).split(df, y) else: folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0).split(df, y[:, 0]) for n_fold, (train_index, val_index) in enumerate(folds): train_df = df.loc[train_index] val_df = df.loc[val_index] y_train = y[train_index] y_val = y[val_index] if n_fold == fold_id: break with timer('preprocessing'): train_augmentation = Compose([ Flip(p=0.5), OneOf([ ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(p=0.5), OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5) ], p=0.5), RandomBrightnessContrast(p=0.5), Blur(blur_limit=8, p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), Resize(img_size, img_size, p=1) ]) val_augmentation = Compose([Resize(img_size, img_size, p=1)]) train_dataset = KDDataset(train_df, y_train, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=train_augmentation) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True) val_dataset = KDDataset(val_df, y_val, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True) del df, train_dataset, val_dataset gc.collect() with timer('create model'): model = Efficient(num_classes=N_CLASSES, encoder="efficientnet-b3", pool_type="avg") if model_path is not None: model.load_state_dict(torch.load(model_path)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4) # model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) with timer('train'): best_score = 0 best_epoch = 0 for epoch in range(1, epochs + 1): seed_torch(SEED + epoch) if epoch == epochs - 3: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.1 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, N_CLASSES, cutmix_prob=0.3) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) y_pred, target, val_loss = validate(model, val_loader, criterion, device, N_CLASSES) score = roc_auc_score(target, y_pred) LOGGER.info('Mean val loss: {}'.format(round(val_loss, 5))) LOGGER.info('val score: {}'.format(round(score, 5))) if score > best_score: best_score = score best_epoch = epoch np.save("y_pred.npy", y_pred) torch.save(model.state_dict(), save_path) np.save("target.npy", target) LOGGER.info('best score: {} on epoch: {}'.format( round(best_score, 5), best_epoch)) with timer('predict'): test_df = pd.read_csv(TEST_PATH) test_ids = test_df["id"].values test_augmentation = Compose([Resize(img_size, img_size, p=1)]) test_dataset = KDDatasetTest(test_df, img_size, TEST_IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, n_tta=2) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True) model.load_state_dict(torch.load(save_path)) pred = predict(model, test_loader, device, N_CLASSES, n_tta=2) print(pred.shape) results = pd.DataFrame({"id": test_ids, "is_star": pred.reshape(-1)}) results.to_csv("results.csv", index=False)
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf([ GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), OneOf([ RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5) ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation, crop_rate=1.0) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.Unet('se_resnext50_32x4d', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True) if base_model is not None: model.load_state_dict(torch.load(base_model)) model.to(device) criterion = ComboLoss({ 'bce': 1, 'dice': 1, 'focal': 1 }, channel_weights=[1, 1, 1, 1]) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) if base_model is None: scheduler_cosine = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) scheduler = GradualWarmupScheduler( optimizer, multiplier=1.1, total_epoch=CLR_CYCLE * 2, after_scheduler=scheduler_cosine) else: scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) model = torch.nn.DataParallel(model) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 checkpoint = base_ckpt + 1 for epoch in range(1, EPOCHS + 1): seed = seed + epoch seed_torch(seed) if epoch % (CLR_CYCLE * 2) == 0: LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) checkpoint += 1 best_model_loss = 999 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, cutmix_prob=0.0) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss = validate(model, val_loader, criterion, device) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scheduler.step() if valid_loss < best_model_loss: torch.save( model.module.state_dict(), 'models/{}_fold{}_ckpt{}.pth'.format( EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch #np.save("val_pred.npy", val_pred) #del val_pred gc.collect() LOGGER.info('Best valid loss: {} on epoch={}'.format( round(best_model_loss, 5), best_model_ep)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
def log_state(): LOGGER.info(STATE)
def send(self, data): LOGGER.debug("Client send data: %s" % data) send_data = self.wrap_data(data) self.tx_tmp += len(send_data) self.sock.sendto(send_data, self.server_addr)
def __exit__(self, exc_type, exc_value, exc_traceback): """ Context manager exit/destructor """ LOGGER.debug("DB object context exit") self.end()
def run(self): LOGGER.debug("Client run") self.running = True self.handshake_thread = threading.Thread(target=self.handle_handshake) self.handshake_thread.start()
def run(self, command_line): log.debug(f"RUN: {command_line}") return subprocess.call(command_line, shell=True)
def main(): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID] train_augmentation = Compose([ Flip(p=0.5), OneOf( [ #ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(p=0.5), OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5) ], p=0.5), #OneOf([ # ShiftScaleRotate(p=0.5), ## RandomRotate90(p=0.5), # Rotate(p=0.5) #], p=0.5), OneOf([ Blur(blur_limit=8, p=0.5), MotionBlur(blur_limit=8, p=0.5), MedianBlur(blur_limit=8, p=0.5), GaussianBlur(blur_limit=8, p=0.5) ], p=0.5), OneOf( [ #CLAHE(clip_limit=4, tile_grid_size=(4, 4), p=0.5), RandomGamma(gamma_limit=(100, 140), p=0.5), RandomBrightnessContrast(p=0.5), RandomBrightness(p=0.5), RandomContrast(p=0.5) ], p=0.5), OneOf([ GaussNoise(p=0.5), Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5) ], p=0.5) ]) val_augmentation = None train_dataset = SeverDataset(train_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=train_augmentation) val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) del train_df, val_df, df, train_dataset, val_dataset gc.collect() with timer('create model'): model = smp.UnetPP('se_resnext50_32x4d', encoder_weights='imagenet', classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, deep_supervision=True) model.load_state_dict(torch.load(model_path)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5) #scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=CLR_CYCLE*2, after_scheduler=scheduler_cosine) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) with timer('train'): train_losses = [] valid_losses = [] best_model_loss = 999 best_model_ep = 0 checkpoint = 0 for epoch in range(1, EPOCHS + 1): if epoch % (CLR_CYCLE * 2) == 0: if epoch != 0: y_val = y_val.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1]) best_pred = best_pred.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1]) for i in range(N_CLASSES): th, score, _, _ = search_threshold( y_val[:, i, :, :], best_pred[:, i, :, :]) LOGGER.info( 'Best loss: {} Best Dice: {} on epoch {} th {} class {}' .format(round(best_model_loss, 5), round(score, 5), best_model_ep, th, i)) checkpoint += 1 best_model_loss = 999 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch_dsv(model, train_loader, criterion, optimizer, device) train_losses.append(tr_loss) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) valid_loss, val_pred, y_val = validate_dsv(model, val_loader, criterion, device) valid_losses.append(valid_loss) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scheduler.step() if valid_loss < best_model_loss: torch.save( model.state_dict(), '{}_fold{}_ckpt{}.pth'.format(EXP_ID, FOLD_ID, checkpoint)) best_model_loss = valid_loss best_model_ep = epoch best_pred = val_pred del val_pred gc.collect() with timer('eval'): y_val = y_val.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1]) best_pred = best_pred.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1]) for i in range(N_CLASSES): th, score, _, _ = search_threshold(y_val[:, i, :, :], best_pred[:, i, :, :]) LOGGER.info( 'Best loss: {} Best Dice: {} on epoch {} th {} class {}'. format(round(best_model_loss, 5), round(score, 5), best_model_ep, th, i)) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.plot(xs, valid_losses, label='Val loss') plt.legend() plt.xticks(xs) plt.xlabel('Epochs') plt.savefig("loss.png")
def __enter__(self): """ Context manager enter/constructor """ LOGGER.debug("DB object context enter") return self
def main(): train_df = pd.read_csv(TRAIN_PATH).sample(train_size+valid_size, random_state=seed) y = np.where(train_df['target'] >= 0.5, 1, 0) y_aux = train_df[AUX_COLUMNS].values identity_columns_new = [] for column in identity_columns + ['target']: train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False) if column != "target": identity_columns_new.append(column + "_bin") weights = np.ones((len(train_df),)) / 4 weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(np.int) / 4 weights += (((train_df["target"].values >= 0.5).astype(bool).astype(np.int) + (train_df[identity_columns].fillna(0).values < 0.5).sum(axis=1).astype(bool).astype(np.int)) > 1).astype( bool).astype(np.int) / 4 weights += (((train_df["target"].values < 0.5).astype(bool).astype(np.int) + (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(np.int)) > 1).astype( bool).astype(np.int) / 4 loss_weight = 1.0 / weights.mean() with timer('preprocessing text'): #df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]] train_df['comment_text'] = train_df['comment_text'].astype(str) train_df = train_df.fillna(0) with timer('load embedding'): tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True) X_text, train_lengths = convert_lines(train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, tokenizer) test_df = train_df[train_size:] with timer('train'): X_train, y_train, y_aux_train, w_train = X_text[:train_size], y[:train_size], y_aux[:train_size], weights[ :train_size] X_val, y_val, y_aux_val, w_val = X_text[train_size:], y[train_size:], y_aux[train_size:], weights[ train_size:] trn_lengths, val_lengths = train_lengths[:train_size], train_lengths[train_size:] model = BertForSequenceClassification.from_pretrained(WORK_DIR, cache_dir=None, num_labels=n_labels) model.zero_grad() model = model.to(device) y_train = np.concatenate((y_train.reshape(-1, 1), w_train.reshape(-1, 1), y_aux_train), axis=1) y_val = np.concatenate((y_val.reshape(-1, 1), w_val.reshape(-1, 1), y_aux_val), axis=1) train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.long), torch.tensor(y_train, dtype=torch.float)) valid = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.long), torch.tensor(y_val, dtype=torch.float)) ran_sampler = torch.utils.data.RandomSampler(train_dataset) len_sampler = LenMatchBatchSampler(ran_sampler, batch_size=batch_size, drop_last=False) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=len_sampler) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] num_train_optimization_steps = int(epochs * train_size / batch_size / accumulation_steps) total_step = int(epochs * train_size / batch_size) optimizer = BertAdam(optimizer_grouped_parameters, lr=2e-5, warmup=0.05, t_total=num_train_optimization_steps) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) #criterion = torch.nn.BCEWithLogitsLoss().to(device) criterion = CustomLoss(loss_weight).to(device) LOGGER.info(f"Starting 1 epoch...") tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device, accumulation_steps, total_step, n_labels) LOGGER.info(f'Mean train loss: {round(tr_loss,5)}') torch.save(model.state_dict(), '{}_dic'.format(exp)) valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels) del model gc.collect() torch.cuda.empty_cache() test_df["pred"] = oof_pred[:, 0] test_df = convert_dataframe_to_bool(test_df) bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df)) LOGGER.info(f'final score is {score}') test_df.to_csv("oof.csv", index=False) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss'); plt.legend(); plt.xticks(xs); plt.xlabel('Iter') plt.savefig("loss.png")
def main(): with timer('load data'): df = pd.read_csv(TRAIN_PATH)[:10] df = df[df.Image != "ID_6431af929"].reset_index(drop=True) df.loc[df.pre_SOPInstanceUID == "ID_6431af929", "pre1_SOPInstanceUID"] = df.loc[df.pre_SOPInstanceUID == "ID_6431af929", "Image"] df.loc[df.post_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] = df.loc[df.post_SOPInstanceUID == "ID_6431af929", "Image"] df.loc[df.prepre_SOPInstanceUID == "ID_6431af929", "pre2_SOPInstanceUID"] = df.loc[df.prepre_SOPInstanceUID == "ID_6431af929", "pre1_SOPInstanceUID"] df.loc[df.postpost_SOPInstanceUID == "ID_6431af929", "post2_SOPInstanceUID"] = df.loc[df.postpost_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] df = df[[ "Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID", "pre2_SOPInstanceUID", "post2_SOPInstanceUID" ]] ids = df["Image"].values gc.collect() with timer('preprocessing'): test_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), Resize(img_size, img_size, p=1) ]) test_dataset = RSNADatasetTest(df, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, black_crop=False, three_window=True, rescaling=False, pick_type="post_post", n_tta=N_TTA) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True) del df, test_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") model.load_state_dict(torch.load(model_path)) model.to(device) model = torch.nn.DataParallel(model) with timer('predict'): pred = predict(model, test_loader, device, n_tta=N_TTA) pred = np.clip(pred, 1e-6, 1 - 1e-6) with timer('sub'): sub = pd.DataFrame(pred, columns=TARGET_COLUMNS) sub["ID"] = ids sub = sub.set_index("ID") sub = sub.unstack().reset_index() sub["ID"] = sub["ID"] + "_" + sub["level_0"] sub = sub.rename(columns={0: "Label"}) sub = sub.drop("level_0", axis=1) LOGGER.info(sub.head()) sub.to_csv("../output/{}_train.csv".format(EXP_ID), index=False)
def timer(name): t0 = time.time() yield LOGGER.info('[{}] done in {} s'.format(name, round(time.time() - t0, 2)))
def train_lgbm(X_train, y_train, X_valid, y_valid, X_test, categorical_features, feature_name, fold_id, lgb_params, fit_params, model_name, loss_func, rank=False, calc_importances=True): train = lgb.Dataset(X_train, y_train, categorical_feature=categorical_features, feature_name=feature_name) if X_valid is not None: valid = lgb.Dataset(X_valid, y_valid, categorical_feature=categorical_features, feature_name=feature_name) evals_result = {} if X_valid is not None: model = lgb.train( lgb_params, train, valid_sets=[valid], valid_names=['valid'], evals_result=evals_result, **fit_params ) else: model = lgb.train( lgb_params, train, evals_result=evals_result, **fit_params ) LOGGER.info(f'Best Iteration: {model.best_iteration}') # train score if X_valid is None: y_pred_train = model.predict(X_train, num_iteration=fit_params["num_boost_round"]) y_pred_train[y_pred_train<0] = 0 train_loss = loss_func(y_train, y_pred_train) else: y_pred_train = model.predict(X_train, num_iteration=model.best_iteration) y_pred_train[y_pred_train < 0] = 0 train_loss = loss_func(y_train, y_pred_train) if X_valid is not None: # validation score y_pred_valid = model.predict(X_valid) y_pred_valid[y_pred_valid < 0] = 0 valid_loss = loss_func(y_valid, y_pred_valid) # save prediction #np.save(f'{model_name}_train_fold{fold_id}.npy', y_pred_valid) else: y_pred_valid = None valid_loss = None # save model """要編集""" model.save_model(os.path.join(f'../output/{model_name}', f'{model_name}_fold{fold_id}.txt')) if X_test is not None: # predict test y_pred_test = model.predict(X_test) y_pred_test[y_pred_test < 0] = 0 # save prediction #np.save(f'{model_name}_test_fold{fold_id}.npy', y_pred_test) else: y_pred_test = None if calc_importances: importances = pd.DataFrame() importances['feature'] = feature_name importances['gain'] = model.feature_importance(importance_type='gain') importances['split'] = model.feature_importance(importance_type='split') importances['fold'] = fold_id else: importances = None return y_pred_valid, y_pred_test, train_loss, valid_loss, importances, model.best_iteration
import json from util import get_hpfeeds_client, get_ext_ip from commands import perform_commands import re import datetime import urlparse import os shellshock_re = re.compile(r'\(\s*\)\s*{') # this is the default apache page with open(os.path.join(os.path.dirname(__file__), 'template.html')) as f: page_template = f.read() app = bottle.default_app() LOGGER.info('Loading config file shockpot.conf ...') app.config.load_config(os.path.join(os.path.dirname(__file__), 'shockpot.conf')) hpclient = get_hpfeeds_client(app.config) public_ip = None if app.config['fetch_public_ip.enabled'].lower() == 'true': public_ip = get_ext_ip(json.loads(app.config['fetch_public_ip.urls'])) print 'public_ip =', public_ip def is_shellshock(headers): for name, value in headers: if shellshock_re.search(value): return True return False
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam") model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() with timer('predict'): valid_loss, y_pred, y_true, cls = predict( model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scores = [] for i, (th, remove_mask_pixel) in enumerate(zip(ths, remove_pixels)): sum_val_preds = np.sum( y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1) cls_ = cls[:, i] best = 0 for th_cls in np.linspace(0, 1, 101): val_preds_ = copy.deepcopy(y_pred[:, i, :, :]) val_preds_[sum_val_preds < remove_mask_pixel] = 0 val_preds_[cls_ <= th_cls] = 0 scores = [] for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_): score = dice(y_val_, y_pred_ > 0.5) if np.isnan(score): scores.append(1) else: scores.append(score) if np.mean(scores) >= best: best = np.mean(scores) best_th = th_cls else: break LOGGER.info('dice={} on {}'.format(best, best_th)) scores.append(best) LOGGER.info('holdout dice={}'.format(np.mean(scores)))
def main(): train_df = pd.read_csv(TRAIN_PATH) fold_df = pd.read_csv(FOLD_PATH) n_train_df = len(train_df) old_folds = pd.read_csv(FOLD_PATH_JIGSAW) old_df = pd.read_csv(OLD_PATH) old_df["target"] = old_df[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].sum(axis=1) old_df["target"] = (old_df["target"] >= 1).astype("int8") old_df = old_df[old_folds.fold_id != fold_id] train_df = train_df.append(old_df).reset_index(drop=True) del old_folds, old_df gc.collect() # y = np.where(train_df['target'] >= 0.5, 1, 0) y = train_df['target'].values identity_columns_new = [] for column in identity_columns + ['target']: train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False) if column != "target": identity_columns_new.append(column + "_bin") # Overall #weights = np.ones((len(train_df),)) / 4 # Subgroup #weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(np.int) / 4 # Background Positive, Subgroup Negative #weights += (((train_df["target"].values >= 0.5).astype(bool).astype(np.int) + # (1 - (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype( # np.int))) > 1).astype(bool).astype(np.int) / 4 # Background Negative, Subgroup Positive #weights += (((train_df["target"].values < 0.5).astype(bool).astype(np.int) + # (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype( # np.int)) > 1).astype(bool).astype(np.int) / 4 #loss_weight = 0.5 with timer('preprocessing text'): # df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]] train_df['comment_text'] = train_df['comment_text'].astype(str) train_df = train_df.fillna(0) with timer('load embedding'): tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True) X_text, train_lengths = convert_lines(train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, tokenizer) del train_lengths, tokenizer gc.collect() LOGGER.info(f"X_text {X_text.shape}") X_old = X_text[n_train_df:].astype("int32") X_text = X_text[:n_train_df].astype("int32") #w_trans = weights[n_train_df:].astype("float32") #weights = weights[:n_train_df].astype("float32") y_old = y[n_train_df:].astype("float32") y = y[:n_train_df].astype("float32") train_df = train_df[:n_train_df] with timer('train'): train_index = fold_df.fold_id != fold_id valid_index = fold_df.fold_id == fold_id X_train, y_train = X_text[train_index].astype("int32"), y[train_index].astype("float32") X_val, y_val = X_text[valid_index].astype("int32"), y[valid_index].astype("float32") test_df = train_df[valid_index] del X_text, y, train_index, valid_index, train_df gc.collect() model = BertForSequenceClassification.from_pretrained(WORK_DIR, cache_dir=None, num_labels=n_labels) model.zero_grad() model = model.to(device) X_train = np.concatenate([X_train, X_old], axis=0) y_train = np.concatenate([y_train, y_old], axis=0) train_size = len(X_train) del X_old, y_old gc.collect() train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.long), torch.tensor(y_train, dtype=torch.float32)) valid = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.long), torch.tensor(y_val, dtype=torch.float32)) ran_sampler = torch.utils.data.RandomSampler(train_dataset) len_sampler = LenMatchBatchSampler(ran_sampler, batch_size=batch_size, drop_last=False) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=len_sampler) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False) del X_train, y_train, X_val, y_val gc.collect() LOGGER.info(f"done data loader setup") param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] num_train_optimization_steps = int(epochs * train_size / batch_size / accumulation_steps) total_step = int(epochs * train_size / batch_size) optimizer = BertAdam(optimizer_grouped_parameters, lr=base_lr, warmup=0.005, t_total=num_train_optimization_steps) LOGGER.info(f"done optimizer loader setup") model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) criterion = torch.nn.BCEWithLogitsLoss().to(device) #criterion = CustomLoss(loss_weight).to(device) LOGGER.info(f"done amp setup") for epoch in range(epochs): LOGGER.info(f"Starting {epoch} epoch...") LOGGER.info(f"length {train_size} train...") if epoch == 1: for param_group in optimizer.param_groups: param_group['lr'] = base_lr * gammas[1] tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device, accumulation_steps, total_step, n_labels, base_lr, gamma=gammas[2 * epoch]) LOGGER.info(f'Mean train loss: {round(tr_loss,5)}') torch.save(model.state_dict(), '{}_dic_epoch{}'.format(exp, epoch)) torch.save(optimizer.state_dict(), '{}_optimizer_epoch{}.pth'.format(exp, epoch)) valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels) LOGGER.info(f'Mean valid loss: {round(valid_loss,5)}') if epochs > 1: test_df_cp = test_df.copy() test_df_cp["pred"] = oof_pred[:, 0] test_df_cp = convert_dataframe_to_bool(test_df_cp) bias_metrics_df = compute_bias_metrics_for_model(test_df_cp, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df_cp)) LOGGER.info(f'score is {score}') del model gc.collect() torch.cuda.empty_cache() test_df["pred"] = oof_pred[:, 0] test_df = convert_dataframe_to_bool(test_df) bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df)) LOGGER.info(f'final score is {score}') test_df.to_csv("oof.csv", index=False) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss'); plt.legend(); plt.xticks(xs); plt.xlabel('Iter') plt.savefig("loss.png")
def get_redgifs_gif(query: str, username: str, after_dark_only: bool = False) -> Optional[str]: """ Fetch a special kind of gif, if you know what I mean ;). :param str query: Gif search query. :param str username: Chatango user who triggered the command. :param bool after_dark_only: Whether results should be limited to the `after dark` timeframe. :returns: Optional[str] """ try: night_mode = is_after_dark() if (after_dark_only and night_mode) or after_dark_only is False: token = redgifs_auth_token() endpoint = REDGIFS_IMAGE_SEARCH_ENDPOINT params = { "search_text": query.title(), "order": "trending", "count": 80 } headers = {"Authorization": f"Bearer {token}"} resp = requests.get(endpoint, params=params, headers=headers) results = resp.json().get("gifs", None) if resp.status_code == 200 and results is not None: results = [ result for result in results if result["urls"].get("sd") is not None ] if bool(results): rand = randint(0, len(results) - 1) image_json = results[rand] return get_full_gif_metadata(image_json) elif username == "thegreatpizza": return emojize( f":pizza: *h* wow pizza ur taste in lesbians is so dank that I coughldnt find nething sry :( *h* :pizza:", use_aliases=True, ) elif username == "broiestbro": return emojize( f":@ bro u fgt wot r u searching 4 go2bed :@", use_aliases=True, ) else: return emojize( f":warning: wow @{username} u must b a freak tf r u even searching foughr jfc :warning:", use_aliases=True, ) else: LOGGER.error( f"Error {resp.status_code} fetching NSFW gif: {resp.content}" ) return emojize( f":warning: omfg @{username} u broke bot with ur kinky ass bs smfh :warning:", use_aliases=True, ) return "https://i.imgur.com/oGMHkqT.jpg" except HTTPError as e: LOGGER.warning( f"HTTPError while fetching nsfw image for `{query}`: {e.response.content}" ) return emojize( f":warning: yea nah idk wtf ur searching for :warning:", use_aliases=True, ) except IndexError as e: LOGGER.warning( f"IndexError while fetching nsfw image for `{query}`: {e}") return emojize( f":warning: yea nah idk wtf ur searching for :warning:", use_aliases=True, ) except Exception as e: LOGGER.warning( f"Unexpected error while fetching nsfw image for `{query}`: {e}") return emojize( f":warning: dude u must b a freak cuz that just broke bot :warning:", use_aliases=True, )
def on_exchange_declareok(self, _unused_frame, userdata): LOGGER.info('Exchange declared: %s', userdata) self.setup_queue(self.QUEUE)
def main(): train_df = pd.read_csv(TRAIN_PATH) train_df['male'] = np.load( "../input/identity-column-data/male_labeled.npy") train_df['female'] = np.load( "../input/identity-column-data/female_labeled.npy") train_df['homosexual_gay_or_lesbian'] = np.load( "../input/identity-column-data/homosexual_gay_or_lesbian_labeled.npy") train_df['christian'] = np.load( "../input/identity-column-data/christian_labeled.npy") train_df['jewish'] = np.load( "../input/identity-column-data/jewish_labeled.npy") train_df['muslim'] = np.load( "../input/identity-column-data/muslim_labeled.npy") train_df['black'] = np.load( "../input/identity-column-data/black_labeled.npy") train_df['white'] = np.load( "../input/identity-column-data/white_labeled.npy") train_df['psychiatric_or_mental_illness'] = np.load( "../input/identity-column-data/psychiatric_or_mental_illness_labeled.npy" ) fold_df = pd.read_csv(FOLD_PATH) # y = np.where(train_df['target'] >= 0.5, 1, 0) y = train_df['target'].values y_aux = train_df[AUX_COLUMNS].values identity_columns_new = [] for column in identity_columns + ['target']: train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False) if column != "target": identity_columns_new.append(column + "_bin") # Overall weights = np.ones((len(train_df), )) / 4 # Subgroup weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int) / 4 # Background Positive, Subgroup Negative weights += ( ((train_df["target"].values >= 0.5).astype(bool).astype(np.int) + (1 - (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int))) > 1).astype(bool).astype( np.int) / 4 # Background Negative, Subgroup Positive weights += ( ((train_df["target"].values < 0.5).astype(bool).astype(np.int) + (train_df[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype( np.int) / 4 loss_weight = 0.5 with timer('preprocessing text'): # df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]] train_df['comment_text'] = train_df['comment_text'].astype(str) train_df = train_df.fillna(0) with timer('load embedding'): tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=False) X_text = convert_lines_head_tail( train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, head_len, tokenizer) del tokenizer gc.collect() LOGGER.info(f"X_text {X_text.shape}") with timer('train'): train_index = fold_df.fold_id != fold_id valid_index = fold_df.fold_id == fold_id X_train, y_train, y_aux_train, w_train = X_text[train_index].astype( "int32"), y[train_index], y_aux[train_index], weights[train_index] X_val, y_val, y_aux_val, w_val = X_text[valid_index].astype("int32"), y[valid_index], y_aux[valid_index], \ weights[ valid_index] test_df = train_df[valid_index] del X_text, y, y_aux, weights, train_index, valid_index, train_df gc.collect() model = BertForSequenceClassification(bert_config, num_labels=n_labels) model.load_state_dict(torch.load(model_path)) model.zero_grad() model = model.to(device) y_train = np.concatenate( (y_train.reshape(-1, 1), w_train.reshape(-1, 1), y_aux_train), axis=1).astype("float32") y_val = np.concatenate( (y_val.reshape(-1, 1), w_val.reshape(-1, 1), y_aux_val), axis=1).astype("float32") train_dataset = torch.utils.data.TensorDataset( torch.tensor(X_train, dtype=torch.long), torch.tensor(y_train, dtype=torch.float32)) valid = torch.utils.data.TensorDataset( torch.tensor(X_val, dtype=torch.long), torch.tensor(y_val, dtype=torch.float32)) ran_sampler = torch.utils.data.RandomSampler(train_dataset) len_sampler = LenMatchBatchSampler(ran_sampler, batch_size=batch_size, drop_last=False) train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=len_sampler) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False) LOGGER.info(f"done data loader setup") param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int(epochs * len(X_train) / batch_size / accumulation_steps) total_step = int(epochs * len(X_train) / batch_size) optimizer = BertAdam(optimizer_grouped_parameters, lr=base_lr, warmup=0.005, t_total=num_train_optimization_steps) LOGGER.info(f"done optimizer loader setup") model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) # criterion = torch.nn.BCEWithLogitsLoss().to(device) criterion = CustomLoss(loss_weight).to(device) LOGGER.info(f"done amp setup") for epoch in range(1, epochs + 1): LOGGER.info(f"Starting {epoch} epoch...") LOGGER.info(f"length {len(X_train)} train {len(X_val)} train...") if epoch == 1: for param_group in optimizer.param_groups: param_group['lr'] = base_lr * gammas[1] tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device, accumulation_steps, total_step, n_labels, base_lr, gamma=gammas[2 * epoch]) LOGGER.info(f'Mean train loss: {round(tr_loss,5)}') torch.save(model.state_dict(), '{}_epoch{}_fold{}.pth'.format(exp, epoch, fold_id)) valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels) LOGGER.info(f'Mean valid loss: {round(valid_loss,5)}') del model gc.collect() torch.cuda.empty_cache() test_df["pred"] = oof_pred[:, 0] test_df = convert_dataframe_to_bool(test_df) bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns) LOGGER.info(bias_metrics_df) score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df)) LOGGER.info(f'final score is {score}') test_df.to_csv("oof.csv", index=False) xs = list(range(1, len(train_losses) + 1)) plt.plot(xs, train_losses, label='Train loss') plt.legend() plt.xticks(xs) plt.xlabel('Iter') plt.savefig("loss.png")
def __del__(self): LOGGER.info("RemoteController exited")
def mri_wrapper(ctx, input_folder, from_loris=False): # get config.json config = ctx.obj['cfgjson'] # get the folders from config.json script_parallel_path = os.path.abspath('./mri_run_parallel') script_merge_path = os.path.abspath('./mri_output_merge') mri_raw_root = os.path.abspath( config['mri']['input_folders']['nifti']['raw']) mri_raw_folder = os.path.join(mri_raw_root, input_folder) mri_input_root = os.path.abspath( config['mri']['input_folders']['nifti']['organized']) mri_input_folder = os.path.join(mri_input_root, input_folder) imaging_root = os.path.abspath(config['mipmap']['input_folder']['imaging']) imaging_source_path = os.path.join(imaging_root, input_folder) mri_output_spm12_root = config['mri']['output_folders']['spm12'] mri_output_spm12_folder = os.path.join(mri_output_spm12_root, input_folder) if not from_loris: # Reorganize mri files LOGGER.info('Reorganizing nifti files in folder %s' % mri_input_folder) run_cmd = 'python2 mri_nifti_reorganize/organizer.py %s %s' % ( mri_raw_folder, mri_input_folder) os.system(run_cmd) else: LOGGER.info( 'Skipping NIFTI reorganization step, files already organized by LORIS-for-MIP' ) # run matlab spm12 script LOGGER.info('Running spm12 pipeline...') LOGGER.info('Storing output files in %s' % mri_output_spm12_folder) os.chdir(script_parallel_path) run_cmd = 'python2 mri_parallel_preprocessing.py %s %s' % ( mri_input_folder, mri_output_spm12_folder) LOGGER.info('Executing...%s' % run_cmd) os.system(run_cmd) # merge the output into one csv os.chdir(script_merge_path) run_cmd = 'python2 merge.py %s %s' % (mri_output_spm12_folder, imaging_source_path) LOGGER.info( 'Merging spm12 output pipeline into single csv file in folder %s' % imaging_source_path) os.system(run_cmd)