Example #1
0
def log_request(record):
    global hpclient
    req = json.dumps(record)
    LOGGER.info(req)

    if hpclient and record['is_shellshock']:
        hpclient.publish(app.config['hpfeeds.channel'], req)
Example #2
0
 def get_custom_field(self, field_key):
     result = ''
     try:
         result = self.custom_fields[field_key]
     except KeyError:
         LOGGER.warn('Issue %s does not have value for field %s' % (self.key, field_key))
     return result
Example #3
0
 def end_suite(self, suite):
     self._separator('SUITE')
     self._end('SUITE', suite.longname, suite.elapsedtime)
     self._separator('SUITE')
     if self._indent == 0:
         LOGGER.output_file('Debug', self._file.name)
         self.close()
Example #4
0
 def end_suite(self, suite):
     LOGGER.end_suite(suite)
     if self._xmllogger.ended_output:
         LOGGER.output_file('Output', self._xmllogger.ended_output)
         orig_outpath = self._settings['Output']
         suite.context.output_file_changed(orig_outpath)
         self._create_split_log(self._xmllogger.ended_output, suite)
Example #5
0
 def update_issue_status(status_list):
     for item in status_list:
         status = item['status'].upper()
         key = item['key']
         update_query = DbJiraIssues.update(dc_status=status).where(DbJiraIssues.key == key)
         update_query.execute()
         LOGGER.debug(update_query)
Example #6
0
    def rename(self, path, path1):
        # Rename is handled by copying and deleting files...
        LOGGER.debug("rename %s %s" % (path, path1))
        d = self.get_dir(path)

        if self.is_valid_file(path) and d.is_file(path):
            if not self.is_valid_file(path1):
                # from a valid file to an editor file               
                buf = self.get_file_buf(path1)
                buf.write(d.read_file(path))
                # TODO : remove path ?
            else:
                # from a valid file to a valid file
                # if rename is defined 
                # TODO : with unlink method defined in fs
                pass
        elif not self.is_valid_file(path):
            if self.is_valid_file(path1) and d.is_file(path1):
                # from an editor file to a valid file
                buf = self.get_file_buf(path)
                ret = d.write_to(path1, buf.getvalue())
                self.open_mode = None
                self.remove_file_buf(path)
                if ret == False:
                    return -errno.EIO
            elif not self.is_valid_file(path):
                # from an editor file to an editor file
                # TODO
                pass
Example #7
0
    def checkTrame(self):
        if self.trameUsed:
            LOGGER.debug("Trame received : {}".format(self.trameUsed.lessRawView()))
            if ("A55A" not in self.trameUsed.sep):
                LOGGER.warn("Wrong separator, rejected")

            if (self.doChecksum(self.trameUsed) not in self.trameUsed.checkSum):     
                #Mauvais checkSum
                LOGGER.warn("Wrong checksum, expected : {}, rejected".format(self.doChecksum(self.trameUsed)))

            with self.lock:
                if (self.trameUsed.ident in self.identSet):
                    #Recuperer le capteur en bdd
                    sensorUsed = sensor.Sensor.objects(physic_id=self.trameUsed.ident)[0]
                    newData = '' #la nouvelle data a entrer en base, type dynamique
                    if (sensorUsed.__class__.__name__=="Switch"):
                        newData=sensorUsed.translateTrame(self.trameUsed)
                    elif (sensorUsed.__class__.__name__=="Temperature"):
                        newData = sensorUsed.translateTrame(self.trameUsed)

                    elif (sensorUsed.__class__.__name__=="Position"):
                        newData = sensorUsed.translateTrame(self.trameUsed)
                    else :
                        LOGGER.warn("Other Captor (not handle (YET !) )")
                    # Update de la trame au niveau de la base
                    if newData :
                        sensorUsed.update(newData)
                        LOGGER.info(" Sensor {} ||New data {}".format(sensorUsed.physic_id, sensorUsed.current_state))
            self.trameUsed=''
Example #8
0
def log_request(record):
    global hpclient
    req = json.dumps(record)
    LOGGER.info(req)

    if hpclient and (record['is_shellshock'] or app.config['hpfeeds.only_exploits'].lower() == 'false'):
        hpclient.publish(app.config['hpfeeds.channel'], req)
Example #9
0
 def unlink(self, path):
     LOGGER.debug("FSdir unlink %s" % (path))
     file_name = self.get_article_file_name(path)
     if self.files.has_key(file_name):
         self.files.pop(file_name)
         return True # succeeded
     else:
         return False
Example #10
0
def format_date(date_string):
    result = date_string
    try:
        time_struct = strptime(date_string, "%d/%b/%y")
        result = date.fromtimestamp(mktime(time_struct))
    except (TypeError, ValueError):
        LOGGER.warn('[%s] is not valid date' % date_string)
    return result
Example #11
0
 def translateTrame(self,inTrame):
     """
     return the temperature (range 0-40 c) from data byte 2 
     """
     rowTemp=int(inTrame.data1,16)
     temperature = round((rowTemp*40/255.0),3)
     LOGGER.info("Temperature sensor {} with temp {}".format(inTrame.ident, temperature))
     return temperature
Example #12
0
 def utime(self, path, times):
     LOGGER.debug("utime %s %s" % (path, times))
     d = self.get_dir(path)
     
     if dir(d).count("utime") == 0:
         return -errno.ENOSYS # Not implemented
     else:
         return d.utime(path, times)
Example #13
0
 def sendTrame(self,ident,newState):
     with self.lock:
         sensorUsed=sensor.Device.objects(physic_id=ident)[0]
     daTrame=sensorUsed.gimmeTrame(newState)
     if daTrame:
         self.soc.send(daTrame)
         LOGGER.info("Trame sended : {}".format(daTrame))
         return
Example #14
0
 def updateOne(self,ident):
 	"""
 		Ask for update the sensor with this id
 	"""
 	LOGGER.info("lazily updating {}".format(ident))
 	self.idToUpdate=ident
 	self.newState=''
 	self.save()
Example #15
0
 def receive (self) :
     # LOGGER.debug("en attente de trame")
     message = self.soc.recv(1024)
     if message and len(message)==28:
         LOGGER.debug("trame reçu : {}".format(message))
         self.trameUsed = Trame.trame(message)
     else :
         return
Example #16
0
 def sendTrame(self,ident,newState):
 	"""
 		Ask the traductor to send a trame with the new state of a captor
 	"""
 	LOGGER.info("Lazily updating {} with {}".format(ident,newState))
 	self.idToUpdate=ident
 	self.newState=newState
 	self.save()
Example #17
0
def web_request(program, url):
    LOGGER.info('Performing {} request on {}'.format(program, url))
    data = ''
    try:
        resp = requests.get(url, headers={'User-Agent': USER_AGENTS[program]})
        data = resp.text
    except Exception as e:
        LOGGER.error(e)
    return '{} {}'.format(program, url), data
Example #18
0
    def open(self, path, flags):
        LOGGER.debug("open %s %d" % (path, flags))

        if not self.files.has_key(path):
            if self.is_valid_file(path):
                buf = self.get_file_buf(path)
                d = self.get_dir(path)
                txt = d.read_file(path)
                buf.write(txt)
 def _import_listener(self, name, args):
     listener, source = utils.import_(name, 'listener')
     if not inspect.ismodule(listener):
         listener = listener(*args)
     elif args:
         raise DataError("Listeners implemented as modules do not take arguments")
     LOGGER.info("Imported listener '%s' with arguments %s (source %s)"
                 % (name, utils.seq2str2(args), source))
     return listener
Example #20
0
    def read(self, path, size, offset):
        LOGGER.debug("read %s %d %d" % (path, size, offset))

        self.open_mode = self.READ

        buf = self.get_file_buf(path)

        buf.seek(offset)
        return buf.read(size)
Example #21
0
def retro_browse_points(request):

    selected_team_name = 'allteams'
    selected_sprint_number = 'allsprints'
    selected_sticker_type = 'alltypes'
    count = 15

    teams = Team.objects.all().order_by('name')
    sprints = Sprint.objects.all().order_by('-number')
    types = BoardSticker.TYPE_CHOICES

    stickers = BoardSticker.objects.all().order_by('-creation_date')

    try:
        selected_team_name = request.GET["team"]
        selected_sprint_number = request.GET["sprint"]
        selected_sticker_type = request.GET["type"]
        count = request.GET["count"]
    except KeyError:
        LOGGER.warn('Request with incorrect parameters. Using defaults.')

    if selected_team_name != 'allteams':
        selected_team = Team.objects.get(name=selected_team_name)
        board = RetroBoard.objects.filter(team=selected_team)
        stickers = stickers.filter(retroBoard__in=board)

    if selected_sprint_number != 'allsprints':
        selected_sprint = Sprint.objects.get(number=selected_sprint_number)
        boards = RetroBoard.objects.filter(sprint=selected_sprint)
        stickers = stickers.filter(retroBoard__in=boards)

    if selected_sticker_type != 'alltypes':
        stickers = stickers.filter(type=selected_sticker_type)

    stickers = stickers[:count]

    types_dict = {}
    for item in BoardSticker.TYPE_CHOICES:
        types_dict[item[0]] = item[1]

    for point in stickers:
        point.type_str = types_dict[point.type]

    if selected_sprint_number != 'allsprints':
        selected_sprint_number = int(selected_sprint_number)

    return render_to_response('retro/dpq_retro_action_points.html',
                              RequestContext(request, {'stickers': stickers,
                                                       'teams': teams,
                                                       'sprints': sprints,
                                                       'types': types,
                                                       'selected_team': selected_team_name,
                                                       'selected_sprint': selected_sprint_number,
                                                       'selected_type': selected_sticker_type,
                                                       'count': int(count),
                                                       'count_options': [15, 30, 45, 60],
                                                       'active_branches': get_active_branches()}))
 def call_method(self, method, *args):
     if self.is_java:
         args = [self._to_map(a) if isinstance(a, dict) else a for a in args]
     try:
         method(*args)
     except:
         message, details = utils.get_error_details()
         LOGGER.error("Calling listener method '%s' of listener '%s' failed: %s"
                  % (method.__name__, self.name, message))
         LOGGER.info("Details:\n%s" % details)
Example #23
0
 def get_page(self):
     if self.page is None:
         LOGGER.info('Fetching page contents from Confluence')
         data = self.server.getPage(
             self.get_token(),
             self.settings.namespace,
             self.settings.pagename
         )
         self.page = ConfluencePage(data)
     return self.page
Example #24
0
    def write(self, path, txt, offset):
        LOGGER.debug("write %s [...] %d" % (path, offset))

        self.open_mode = self.WRITE

        buf = self.get_file_buf(path)

        buf.seek(offset)
        buf.write(txt)
        return len(txt)
Example #25
0
    def mkdir(self, path, mode):
        LOGGER.debug("mkdir %s %x" % (path, mode))
        d = self.get_dir(path)

        if dir(d).count("mkdir") == 0:
            return -errno.EACCES # Permission denied
        else:
            res = d.mkdir(path)
            if res != True:
                return -errno.EACCES # Permission denied
Example #26
0
 def rmdir(self, path):
     LOGGER.debug("rmdir %s" % path)
     d = self.get_dir(path)
     
     if dir(d).count("rmdir") == 0:
         return -errno.EACCES # Permission denied
     else:
         res = d.rmdir(path)
         if res != True:
             return -errno.EACCES # Permission denied
Example #27
0
    def _email_config(self):
        try:
            self.mail_server = smtplib.SMTP('smtp.gmail.com', 587)
            self.mail_server.ehlo()
            self.mail_server.starttls()
            self.mail_server.login(self.gmail_user, self.gmail_password)

        except Exception as e:
            LOGGER.info("Failed to connnect. Error: {}".format(e))
            exit()
Example #28
0
 def close(self, suite):
     stats = Statistics(suite, self._settings['SuiteStatLevel'],
                        self._settings['TagStatInclude'],
                        self._settings['TagStatExclude'],
                        self._settings['TagStatCombine'],
                        self._settings['TagDoc'],
                        self._settings['TagStatLink'])
     stats.serialize(self._xmllogger)
     self._xmllogger.close(serialize_errors=True)
     LOGGER.unregister_logger(self._xmllogger)
     LOGGER.output_file('Output', self._settings['Output'])
Example #29
0
    def get_issues(self, issues, limit=300):
        result = []
        keys = ','.join(issues)
        request = 'project=%s AND key in (%s)' % (self.settings.project, keys)
        LOGGER.debug(request)
        response = self.proxy.getIssuesFromJqlSearch(self.get_token(), request, Types.intType(limit))

        for item in response:
            issue = JiraIssue()
            issue.parse_raw(item)
            result.append(issue)
        return result
Example #30
0
    def unlink(self, path):
        LOGGER.debug("unlink %s" % path)
        d = self.get_dir(path)

        self.remove_file_buf(path)

        if self.is_valid_file(path):
            if dir(d).count("unlink") == 0:
                return -errno.EACCES # Permission denied
            else:
                res = d.unlink(path)
                if res != True:
                    return -errno.EACCES # Permission denied
Example #31
0
async def dashboard(hostname, sar_params, time_range, nested_elem):
    config.read(CFG_PATH)
    api_endpoint = config.get('Grafana','api_url')

    payload = {
        "ts_beg": time_range['grafana_range_begin'],
        "ts_end": time_range['grafana_range_end'],
        "nodename": hostname,
        "modes": sar_params,
        "nested_elem":nested_elem
    }

    LOGGER.debug(api_endpoint)
    LOGGER.debug(payload)

    try:
        res = requests.post(api_endpoint, json=payload)
        if res.status_code == 200:
            LOGGER.debug("status code: %s" % res.status_code)
            LOGGER.debug("content: \n%s" % res.content)
            LOGGER.debug("Dashboard created for -- %s" % hostname);
        else:
            LOGGER.warn("status code: %s" % res.status_code)
            LOGGER.warn("content: \n%s" % res.content)
    
        slug = json.loads(res.text)['slug']
        LOGGER.debug(json.loads(res.text))
        LOGGER.debug(slug)
    except ConnectionError:
        LOGGER.error("endpoint not active. Couldn't connect.")
        slug = None
    except Exception as e:
        LOGGER.error(str(e))
        LOGGER.error("unknown error. Couldn't trigger request.")
        slug = None

    return slug
Example #32
0
    response = weather_api.read()
    response_dictionary = json.loads(response)

    forecast_api = urllib2.urlopen(request_2)
    response_2 = forecast_api.read()
    response_2_dictionary = json.loads(response_2)

except Exception:
    wtr = 'Failed to connect to Open Weather Map.  '
try:
    current = response_dictionary['main']['temp']
    current_low = response_dictionary['main']['temp_min']
    current_high = response_dictionary['main']['temp_max']
    conditions = response_dictionary['weather'][0]['description']
except KeyError:
    LOGGER.error('Unable to read links')
    raise RuntimeError('Unable to read links')

current = str(round(current, 1)).replace('.', ' point ')
current_low = str(round(current_low, 1)).replace('.', ' point ')
current_high = str(round(current_high, 1)).replace('.', ' point ')
todays_low = response_2_dictionary['list'][0]['main']['temp_min']
todays_high = response_2_dictionary['list'][0]['main']['temp_max']

todays_low_str = str(round(todays_low, 1)).replace('.', ' point ')
todays_high_str = str(round(todays_high, 1)).replace('.', ' point ')

LOGGER.info('Max:, {}, Min:, {}'.format(todays_high, todays_low))
wtr = ('Weather conditions for today, ' + conditions +
       ' with a current temperature of ' + current)
frc = (', a low of ' + todays_low_str + ' and a high of  ' + todays_high_str +
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape(
            -1, 1)
        y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(
            -1, 1)
        y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape(
            -1, 1)
        y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape(
            -1, 1)
        y = np.concatenate([y1, y2, y3, y4], axis=1)

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]
        y_train, y_val = y[df.fold_id != FOLD_ID], y[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            OneOf([
                RandomGamma(gamma_limit=(100, 140), p=0.5),
                RandomBrightnessContrast(p=0.5),
                RandomBrightness(p=0.5),
                RandomContrast(p=0.5)
            ],
                  p=0.5),
            OneOf([
                GaussNoise(p=0.5),
                Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5)
            ],
                  p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df,
                                     IMG_DIR,
                                     IMG_SIZE,
                                     N_CLASSES,
                                     id_colname=ID_COLUMNS,
                                     transforms=train_augmentation,
                                     crop_rate=1.0,
                                     class_y=y_train)
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        train_sampler = MaskProbSampler(train_df, demand_non_empty_proba=0.6)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  sampler=train_sampler,
                                  num_workers=8)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('resnet34',
                         encoder_weights="imagenet",
                         classes=N_CLASSES,
                         encoder_se_module=True,
                         decoder_semodule=True,
                         h_columns=False,
                         skip=True,
                         act="swish",
                         freeze_bn=True,
                         classification=CLASSIFICATION,
                         attention_type="cbam")
        model = convert_model(model)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam([
            {
                'params': model.decoder.parameters(),
                'lr': 3e-3
            },
            {
                'params': model.encoder.parameters(),
                'lr': 3e-4
            },
        ])
        if base_model is None:
            scheduler_cosine = CosineAnnealingLR(optimizer,
                                                 T_max=CLR_CYCLE,
                                                 eta_min=3e-5)
            scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=1.1,
                total_epoch=CLR_CYCLE * 2,
                after_scheduler=scheduler_cosine)
        else:
            scheduler = CosineAnnealingLR(optimizer,
                                          T_max=CLR_CYCLE,
                                          eta_min=3e-5)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)

        if EMA:
            ema_model = copy.deepcopy(model)
            if base_model_ema is not None:
                ema_model.load_state_dict(torch.load(base_model_ema))
            ema_model.to(device)
        else:
            ema_model = None
        model = torch.nn.DataParallel(model)
        ema_model = torch.nn.DataParallel(ema_model)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ema_loss = 999
        best_model_ep = 0
        ema_decay = 0
        checkpoint = base_ckpt + 1

        for epoch in range(102, EPOCHS + 1):
            seed = seed + epoch
            seed_torch(seed)

            if epoch >= EMA_START:
                ema_decay = 0.99

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model,
                                      train_loader,
                                      criterion,
                                      optimizer,
                                      device,
                                      cutmix_prob=0.0,
                                      classification=CLASSIFICATION,
                                      ema_model=ema_model,
                                      ema_decay=ema_decay)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss = validate(model,
                                  val_loader,
                                  criterion,
                                  device,
                                  classification=CLASSIFICATION)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

            if EMA and epoch >= EMA_START:
                ema_valid_loss = validate(ema_model,
                                          val_loader,
                                          criterion,
                                          device,
                                          classification=CLASSIFICATION)
                LOGGER.info('Mean EMA valid loss: {}'.format(
                    round(ema_valid_loss, 5)))

                if ema_valid_loss < best_model_ema_loss:
                    torch.save(
                        ema_model.module.state_dict(),
                        'models/{}_fold{}_ckpt{}_ema.pth'.format(
                            EXP_ID, FOLD_ID, checkpoint))
                    best_model_ema_loss = ema_valid_loss

            scheduler.step()

            if valid_loss < best_model_loss:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_ckpt{}.pth'.format(
                        EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch
                #np.save("val_pred.npy", val_pred)

            if epoch % (CLR_CYCLE * 2) == CLR_CYCLE * 2 - 1:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_latest.pth'.format(EXP_ID, FOLD_ID))
                LOGGER.info('Best valid loss: {} on epoch={}'.format(
                    round(best_model_loss, 5), best_model_ep))
                if EMA:
                    torch.save(
                        ema_model.module.state_dict(),
                        'models/{}_fold{}_latest_ema.pth'.format(
                            EXP_ID, FOLD_ID))
                    LOGGER.info('Best ema valid loss: {}'.format(
                        round(best_model_ema_loss, 5)))
                checkpoint += 1
                best_model_loss = 999

            #del val_pred
            gc.collect()

    LOGGER.info('Best valid loss: {} on epoch={}'.format(
        round(best_model_loss, 5), best_model_ep))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
BATCH_SIZE = 32
EPOCHS = 125
FOLD_ID = 1
EXP_ID = "exp55_unet_resnet"
CLASSIFICATION = True
EMA = True
EMA_START = 6
base_ckpt = 17
base_model = None
base_model_ema = None
base_model = "models/{}_fold{}_latest.pth".format(EXP_ID, FOLD_ID)
base_model_ema = "models/{}_fold{}_latest_ema.pth".format(EXP_ID, FOLD_ID)

setup_logger(out_file=LOGGER_PATH)
seed_torch(SEED)
LOGGER.info("seed={}".format(SEED))


@contextmanager
def timer(name):
    t0 = time.time()
    yield
    LOGGER.info('[{}] done in {} s'.format(name, round(time.time() - t0, 2)))


def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape(
            -1, 1)
        y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)
        df = df[df.Image != "ID_6431af929"].reset_index(drop=True)
        df.loc[df.pre_SOPInstanceUID == "ID_6431af929",
               "pre1_SOPInstanceUID"] = df.loc[df.pre_SOPInstanceUID ==
                                               "ID_6431af929", "Image"]
        df.loc[df.post_SOPInstanceUID == "ID_6431af929",
               "post1_SOPInstanceUID"] = df.loc[df.post_SOPInstanceUID ==
                                                "ID_6431af929", "Image"]
        df.loc[df.prepre_SOPInstanceUID == "ID_6431af929",
               "pre2_SOPInstanceUID"] = df.loc[df.prepre_SOPInstanceUID ==
                                               "ID_6431af929",
                                               "pre1_SOPInstanceUID"]
        df.loc[df.postpost_SOPInstanceUID == "ID_6431af929",
               "post2_SOPInstanceUID"] = df.loc[df.postpost_SOPInstanceUID ==
                                                "ID_6431af929",
                                                "post1_SOPInstanceUID"]
        y = df[TARGET_COLUMNS].values
        df = df[[
            "Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID",
            "pre2_SOPInstanceUID", "post2_SOPInstanceUID"
        ]]
        gc.collect()

    with timer('preprocessing'):
        train_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            HorizontalFlip(p=0.5),
            OneOf([
                ElasticTransform(p=0.5,
                                 alpha=120,
                                 sigma=120 * 0.05,
                                 alpha_affine=120 * 0.03),
                GridDistortion(p=0.5),
                OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            Rotate(limit=30, border_mode=0, p=0.7),
            Resize(img_size, img_size, p=1)
        ])

        train_dataset = RSNADataset(df,
                                    y,
                                    img_size,
                                    IMAGE_PATH,
                                    id_colname=ID_COLUMNS,
                                    transforms=train_augmentation,
                                    black_crop=False,
                                    three_window=True,
                                    rescaling=False,
                                    pick_type="post_post")
        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=8,
                                  pin_memory=True)
        del df, train_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        if model_path is not None:
            model.load_state_dict(torch.load(model_path))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss(
            weight=torch.FloatTensor([2, 1, 1, 1, 1, 1]).cuda())
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4)
        model = torch.nn.DataParallel(model)

    with timer('train'):
        for epoch in range(1, epochs + 1):
            if epoch == 5:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.1
            seed_torch(SEED + epoch)

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model, train_loader, criterion,
                                      optimizer, device)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            torch.save(model.module.state_dict(),
                       'models/{}_ep{}.pth'.format(EXP_ID, epoch))
Example #36
0
 def setup_queue(self, queue_name):
     LOGGER.info('Declaring queue %s', queue_name)
     cb = functools.partial(self.on_queue_declareok, userdata=queue_name)
     self._channel.queue_declare(queue=queue_name, callback=cb)
Example #37
0
    def start(self):
        if len(self.players) != Othello.player_num:
            LOGGER.error(u"invalid player num:{}, expected:{}".format(
                len(self.players), Othello.player_num))
            return
        LOGGER.info(u"Othello game started!")
        LOGGER.info(u"choosing player...")

        offset = random.randint(0, Othello.player_num - 1)

        for idx, p in enumerate(self.piece_pool):
            tmp_player = self.players[(idx + offset) % len(self.players)]
            # self.piece_dict[p] = tmp_player
            tmp_player.set_piece(p)

        self.round = 0
        pass_time = 0
        LOGGER.info(u"start moving...")
        last_player = None

        while True:
            if pass_time == len(self.players):
                LOGGER.info(u"all players has no way to go, game is finished!")
                break
            cur_player = self.players[(self.round + offset) %
                                      len(self.players)]
            cur_piece = cur_player.piece
            if last_player:
                last_player.notify_status(self.board, cur_piece)

            self.round += 1
            LOGGER.debug(u"current board")
            LOGGER.debug(board2str(self.board))
            valid_points = get_valid_points(self.board, cur_piece)
            LOGGER.debug(u"valid points:{}".format(valid_points))
            LOGGER.info(u"round{} player[{}] putting {} ".format(
                self.round, cur_player, cur_player.piece))

            if not valid_points:
                valid_points.append((None, None))
            action = cur_player.play(valid_points, self.board)
            last_player = cur_player
            if action not in valid_points:
                LOGGER.error(
                    "invalid action :{}, pass this round".format(action))
                pass_time += 1
                continue
            if action == (None, None):
                LOGGER.info(u"player[{}] has no way to go".format(cur_player))
                pass_time += 1
                continue

            LOGGER.info(u"player[{}] put {} to point {}".format(
                cur_player, cur_piece, action))
            self.put_piece(action[0], action[1], cur_piece)
            pass_time = 0

        score = get_score(self.board)
        if score > 0:
            win_piece = BLACK
        elif score < 0:
            win_piece = WHITE
        else:
            win_piece = None
            LOGGER.info(u"THIS IS A DRAW GAME!")

        winner = None
        if win_piece:
            for player in self.players:
                LOGGER.debug(u"notifying player[{}] of reward".format(player))
                player.notify_win(win_piece)
                if player.piece == win_piece:
                    LOGGER.info(
                        u"piece:{}[player:{}] WIN! WITH SCORE={}".format(
                            win_piece, player, abs(score)))
                    winner = player

        LOGGER.info(u"game finish!")
        return winner
Example #38
0
def timer(name):
    t0 = time.time()
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s')
Example #39
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)
        if N_CLASSES == 3:
            df.drop("EncodedPixels_2", axis=1, inplace=True)
            df = df.rename(columns={"EncodedPixels_3": "EncodedPixels_2"})
            df = df.rename(columns={"EncodedPixels_4": "EncodedPixels_3"})

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]

        val_augmentation = None
        val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS,
                                  transforms=val_augmentation)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True,
                         decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True,
                         classification=CLASSIFICATION)
        model.load_state_dict(torch.load(base_model))
        model.to(device)
        model.eval()

        criterion = torch.nn.BCEWithLogitsLoss()

    with timer('predict'):
        valid_loss, y_pred, y_true, cls = predict(model, val_loader, criterion, device, classification=CLASSIFICATION)
        LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

        scores = []
        all_scores = []
        min_sizes = [300, 0, 600, 1600]
        for i in range(N_CLASSES):
            if i == 1:
                continue
            best = 0
            count = 0
            min_size = min_sizes[i]
            for th in [0.7+i*0.01 for i in range(30)]:
                val_preds_ = copy.deepcopy(y_pred[:, i, :, :])
                scores_ = []
                all_scores_ = []
                for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_):
                    y_pred_ = post_process(y_pred_ > 0.5, y_pred_, min_size, th)
                    score = dice(y_val_, y_pred_)
                    if np.isnan(score):
                        scores_.append(1)
                    else:
                        scores_.append(score)
                LOGGER.info('dice={} on {}'.format(np.mean(scores_), th))
                if np.mean(scores_) >= best:
                    best = np.mean(scores_)
                    count = 0
                else:
                    count += 1
                if count == 3:
                    break
            scores.append(best)
            all_scores.append(all_scores_)

        LOGGER.info('holdout dice={}'.format(np.mean(scores)))
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)
        df["loc_x"] = df["loc_x"] / 100
        df["loc_y"] = df["loc_y"] / 100
        y = df[TARGET_COLUMNS].values
        df = df[[ID_COLUMNS]]
        gc.collect()

    with timer("split data"):
        if y.shape[1] == 1:
            folds = StratifiedKFold(n_splits=5, shuffle=True,
                                    random_state=0).split(df, y)
        else:
            folds = StratifiedKFold(n_splits=5, shuffle=True,
                                    random_state=0).split(df, y[:, 0])
        for n_fold, (train_index, val_index) in enumerate(folds):
            train_df = df.loc[train_index]
            val_df = df.loc[val_index]
            y_train = y[train_index]
            y_val = y[val_index]
            if n_fold == fold_id:
                break

    with timer('preprocessing'):
        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                ElasticTransform(p=0.5,
                                 alpha=120,
                                 sigma=120 * 0.05,
                                 alpha_affine=120 * 0.03),
                GridDistortion(p=0.5),
                OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            RandomBrightnessContrast(p=0.5),
            Blur(blur_limit=8, p=0.5),
            ShiftScaleRotate(rotate_limit=20, p=0.5),
            Resize(img_size, img_size, p=1)
        ])
        val_augmentation = Compose([Resize(img_size, img_size, p=1)])

        train_dataset = KDDataset(train_df,
                                  y_train,
                                  img_size,
                                  IMAGE_PATH,
                                  id_colname=ID_COLUMNS,
                                  transforms=train_augmentation)
        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=2,
                                  pin_memory=True)

        val_dataset = KDDataset(val_df,
                                y_val,
                                img_size,
                                IMAGE_PATH,
                                id_colname=ID_COLUMNS,
                                transforms=val_augmentation)
        val_loader = DataLoader(val_dataset,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=2,
                                pin_memory=True)
        del df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = Efficient(num_classes=N_CLASSES,
                          encoder="efficientnet-b3",
                          pool_type="avg")
        if model_path is not None:
            model.load_state_dict(torch.load(model_path))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4)

        # model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)

    with timer('train'):
        best_score = 0
        best_epoch = 0
        for epoch in range(1, epochs + 1):
            seed_torch(SEED + epoch)

            if epoch == epochs - 3:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.1

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model,
                                      train_loader,
                                      criterion,
                                      optimizer,
                                      device,
                                      N_CLASSES,
                                      cutmix_prob=0.3)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            y_pred, target, val_loss = validate(model, val_loader, criterion,
                                                device, N_CLASSES)
            score = roc_auc_score(target, y_pred)
            LOGGER.info('Mean val loss: {}'.format(round(val_loss, 5)))
            LOGGER.info('val score: {}'.format(round(score, 5)))

            if score > best_score:
                best_score = score
                best_epoch = epoch
                np.save("y_pred.npy", y_pred)
                torch.save(model.state_dict(), save_path)

        np.save("target.npy", target)
        LOGGER.info('best score: {} on epoch: {}'.format(
            round(best_score, 5), best_epoch))

    with timer('predict'):
        test_df = pd.read_csv(TEST_PATH)
        test_ids = test_df["id"].values

        test_augmentation = Compose([Resize(img_size, img_size, p=1)])
        test_dataset = KDDatasetTest(test_df,
                                     img_size,
                                     TEST_IMAGE_PATH,
                                     id_colname=ID_COLUMNS,
                                     transforms=test_augmentation,
                                     n_tta=2)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=2,
                                 pin_memory=True)

        model.load_state_dict(torch.load(save_path))

        pred = predict(model, test_loader, device, N_CLASSES, n_tta=2)
        print(pred.shape)
        results = pd.DataFrame({"id": test_ids, "is_star": pred.reshape(-1)})

        results.to_csv("results.csv", index=False)
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf([
                GridDistortion(p=0.5),
                OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.5),
            OneOf([
                RandomGamma(gamma_limit=(100, 140), p=0.5),
                RandomBrightnessContrast(p=0.5),
                RandomBrightness(p=0.5),
                RandomContrast(p=0.5)
            ],
                  p=0.5),
            OneOf([
                GaussNoise(p=0.5),
                Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5)
            ],
                  p=0.5)
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df,
                                     IMG_DIR,
                                     IMG_SIZE,
                                     N_CLASSES,
                                     id_colname=ID_COLUMNS,
                                     transforms=train_augmentation,
                                     crop_rate=1.0)
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  num_workers=8)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('se_resnext50_32x4d',
                         encoder_weights="imagenet",
                         classes=N_CLASSES,
                         encoder_se_module=True,
                         decoder_semodule=True,
                         h_columns=False,
                         skip=True)
        if base_model is not None:
            model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = ComboLoss({
            'bce': 1,
            'dice': 1,
            'focal': 1
        },
                              channel_weights=[1, 1, 1, 1])
        optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
        if base_model is None:
            scheduler_cosine = CosineAnnealingLR(optimizer,
                                                 T_max=CLR_CYCLE,
                                                 eta_min=3e-5)
            scheduler = GradualWarmupScheduler(
                optimizer,
                multiplier=1.1,
                total_epoch=CLR_CYCLE * 2,
                after_scheduler=scheduler_cosine)
        else:
            scheduler = CosineAnnealingLR(optimizer,
                                          T_max=CLR_CYCLE,
                                          eta_min=3e-5)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
        model = torch.nn.DataParallel(model)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ep = 0
        checkpoint = base_ckpt + 1

        for epoch in range(1, EPOCHS + 1):
            seed = seed + epoch
            seed_torch(seed)
            if epoch % (CLR_CYCLE * 2) == 0:
                LOGGER.info('Best valid loss: {} on epoch={}'.format(
                    round(best_model_loss, 5), best_model_ep))
                checkpoint += 1
                best_model_loss = 999

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch(model,
                                      train_loader,
                                      criterion,
                                      optimizer,
                                      device,
                                      cutmix_prob=0.0)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss = validate(model, val_loader, criterion, device)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

            scheduler.step()

            if valid_loss < best_model_loss:
                torch.save(
                    model.module.state_dict(),
                    'models/{}_fold{}_ckpt{}.pth'.format(
                        EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch
                #np.save("val_pred.npy", val_pred)

            #del val_pred
            gc.collect()

    LOGGER.info('Best valid loss: {} on epoch={}'.format(
        round(best_model_loss, 5), best_model_ep))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
Example #42
0
 def log_state():
     LOGGER.info(STATE)
Example #43
0
 def send(self, data):
     LOGGER.debug("Client send data: %s" % data)
     send_data = self.wrap_data(data)
     self.tx_tmp += len(send_data)
     self.sock.sendto(send_data, self.server_addr)
Example #44
0
 def __exit__(self, exc_type, exc_value, exc_traceback):
     """
     Context manager exit/destructor
     """
     LOGGER.debug("DB object context exit")
     self.end()
Example #45
0
 def run(self):
     LOGGER.debug("Client run")
     self.running = True
     self.handshake_thread = threading.Thread(target=self.handle_handshake)
     self.handshake_thread.start()
Example #46
0
 def run(self, command_line):
     log.debug(f"RUN: {command_line}")
     return subprocess.call(command_line, shell=True)
def main():
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        train_df, val_df = df[df.fold_id != FOLD_ID], df[df.fold_id == FOLD_ID]

        train_augmentation = Compose([
            Flip(p=0.5),
            OneOf(
                [
                    #ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                    GridDistortion(p=0.5),
                    OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5)
                ],
                p=0.5),
            #OneOf([
            #    ShiftScaleRotate(p=0.5),
            ##    RandomRotate90(p=0.5),
            #    Rotate(p=0.5)
            #], p=0.5),
            OneOf([
                Blur(blur_limit=8, p=0.5),
                MotionBlur(blur_limit=8, p=0.5),
                MedianBlur(blur_limit=8, p=0.5),
                GaussianBlur(blur_limit=8, p=0.5)
            ],
                  p=0.5),
            OneOf(
                [
                    #CLAHE(clip_limit=4, tile_grid_size=(4, 4), p=0.5),
                    RandomGamma(gamma_limit=(100, 140), p=0.5),
                    RandomBrightnessContrast(p=0.5),
                    RandomBrightness(p=0.5),
                    RandomContrast(p=0.5)
                ],
                p=0.5),
            OneOf([
                GaussNoise(p=0.5),
                Cutout(num_holes=10, max_h_size=10, max_w_size=20, p=0.5)
            ],
                  p=0.5)
        ])
        val_augmentation = None

        train_dataset = SeverDataset(train_df,
                                     IMG_DIR,
                                     IMG_SIZE,
                                     N_CLASSES,
                                     id_colname=ID_COLUMNS,
                                     transforms=train_augmentation)
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  num_workers=2)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=2)

        del train_df, val_df, df, train_dataset, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.UnetPP('se_resnext50_32x4d',
                           encoder_weights='imagenet',
                           classes=N_CLASSES,
                           encoder_se_module=True,
                           decoder_semodule=True,
                           h_columns=False,
                           deep_supervision=True)
        model.load_state_dict(torch.load(model_path))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
        scheduler = CosineAnnealingLR(optimizer, T_max=CLR_CYCLE, eta_min=3e-5)
        #scheduler = GradualWarmupScheduler(optimizer, multiplier=1.1, total_epoch=CLR_CYCLE*2, after_scheduler=scheduler_cosine)

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)

    with timer('train'):
        train_losses = []
        valid_losses = []

        best_model_loss = 999
        best_model_ep = 0
        checkpoint = 0

        for epoch in range(1, EPOCHS + 1):
            if epoch % (CLR_CYCLE * 2) == 0:
                if epoch != 0:
                    y_val = y_val.reshape(-1, N_CLASSES, IMG_SIZE[0],
                                          IMG_SIZE[1])
                    best_pred = best_pred.reshape(-1, N_CLASSES, IMG_SIZE[0],
                                                  IMG_SIZE[1])
                    for i in range(N_CLASSES):
                        th, score, _, _ = search_threshold(
                            y_val[:, i, :, :], best_pred[:, i, :, :])
                        LOGGER.info(
                            'Best loss: {} Best Dice: {} on epoch {} th {} class {}'
                            .format(round(best_model_loss, 5), round(score, 5),
                                    best_model_ep, th, i))
                checkpoint += 1
                best_model_loss = 999

            LOGGER.info("Starting {} epoch...".format(epoch))
            tr_loss = train_one_epoch_dsv(model, train_loader, criterion,
                                          optimizer, device)
            train_losses.append(tr_loss)
            LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

            valid_loss, val_pred, y_val = validate_dsv(model, val_loader,
                                                       criterion, device)
            valid_losses.append(valid_loss)
            LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

            scheduler.step()

            if valid_loss < best_model_loss:
                torch.save(
                    model.state_dict(),
                    '{}_fold{}_ckpt{}.pth'.format(EXP_ID, FOLD_ID, checkpoint))
                best_model_loss = valid_loss
                best_model_ep = epoch
                best_pred = val_pred

            del val_pred
            gc.collect()

    with timer('eval'):
        y_val = y_val.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1])
        best_pred = best_pred.reshape(-1, N_CLASSES, IMG_SIZE[0], IMG_SIZE[1])
        for i in range(N_CLASSES):
            th, score, _, _ = search_threshold(y_val[:, i, :, :],
                                               best_pred[:, i, :, :])
            LOGGER.info(
                'Best loss: {} Best Dice: {} on epoch {} th {} class {}'.
                format(round(best_model_loss, 5), round(score, 5),
                       best_model_ep, th, i))

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.plot(xs, valid_losses, label='Val loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Epochs')
    plt.savefig("loss.png")
Example #48
0
 def __enter__(self):
     """
     Context manager enter/constructor
     """
     LOGGER.debug("DB object context enter")
     return self
Example #49
0
def main():
    train_df = pd.read_csv(TRAIN_PATH).sample(train_size+valid_size, random_state=seed)

    y = np.where(train_df['target'] >= 0.5, 1, 0)
    y_aux = train_df[AUX_COLUMNS].values

    identity_columns_new = []
    for column in identity_columns + ['target']:
        train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False)
        if column != "target":
            identity_columns_new.append(column + "_bin")

    weights = np.ones((len(train_df),)) / 4
    weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(np.int) / 4
    weights += (((train_df["target"].values >= 0.5).astype(bool).astype(np.int) +
                 (train_df[identity_columns].fillna(0).values < 0.5).sum(axis=1).astype(bool).astype(np.int)) > 1).astype(
        bool).astype(np.int) / 4
    weights += (((train_df["target"].values < 0.5).astype(bool).astype(np.int) +
                 (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(np.int)) > 1).astype(
        bool).astype(np.int) / 4
    loss_weight = 1.0 / weights.mean()

    with timer('preprocessing text'):
        #df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]]
        train_df['comment_text'] = train_df['comment_text'].astype(str)
        train_df = train_df.fillna(0)

    with timer('load embedding'):
        tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True)
        X_text, train_lengths = convert_lines(train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, tokenizer)

    test_df = train_df[train_size:]

    with timer('train'):
        X_train, y_train, y_aux_train, w_train = X_text[:train_size], y[:train_size], y_aux[:train_size], weights[
                :train_size]
        X_val, y_val, y_aux_val, w_val = X_text[train_size:], y[train_size:], y_aux[train_size:], weights[
            train_size:]
        trn_lengths, val_lengths = train_lengths[:train_size], train_lengths[train_size:]
        model = BertForSequenceClassification.from_pretrained(WORK_DIR, cache_dir=None, num_labels=n_labels)
        model.zero_grad()
        model = model.to(device)

        y_train = np.concatenate((y_train.reshape(-1, 1), w_train.reshape(-1, 1), y_aux_train), axis=1)
        y_val = np.concatenate((y_val.reshape(-1, 1), w_val.reshape(-1, 1), y_aux_val), axis=1)

        train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.long),
                                                       torch.tensor(y_train, dtype=torch.float))
        valid = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.long),
                                               torch.tensor(y_val, dtype=torch.float))
        ran_sampler = torch.utils.data.RandomSampler(train_dataset)
        len_sampler = LenMatchBatchSampler(ran_sampler, batch_size=batch_size, drop_last=False)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=len_sampler)
        valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False)

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

        num_train_optimization_steps = int(epochs * train_size / batch_size / accumulation_steps)
        total_step = int(epochs * train_size / batch_size)

        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=2e-5,
                             warmup=0.05,
                             t_total=num_train_optimization_steps)

        model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)
        #criterion = torch.nn.BCEWithLogitsLoss().to(device)
        criterion = CustomLoss(loss_weight).to(device)

        LOGGER.info(f"Starting 1 epoch...")
        tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device,
                                                accumulation_steps, total_step, n_labels)
        LOGGER.info(f'Mean train loss: {round(tr_loss,5)}')

        torch.save(model.state_dict(), '{}_dic'.format(exp))

        valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels)
        del model
        gc.collect()
        torch.cuda.empty_cache()

    test_df["pred"] = oof_pred[:, 0]
    test_df = convert_dataframe_to_bool(test_df)
    bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns)
    LOGGER.info(bias_metrics_df)

    score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df))
    LOGGER.info(f'final score is {score}')

    test_df.to_csv("oof.csv", index=False)

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss');
    plt.legend();
    plt.xticks(xs);
    plt.xlabel('Iter')
    plt.savefig("loss.png")
Example #50
0
def main():
    with timer('load data'):
        df = pd.read_csv(TRAIN_PATH)[:10]
        df = df[df.Image != "ID_6431af929"].reset_index(drop=True)
        df.loc[df.pre_SOPInstanceUID == "ID_6431af929",
               "pre1_SOPInstanceUID"] = df.loc[df.pre_SOPInstanceUID ==
                                               "ID_6431af929", "Image"]
        df.loc[df.post_SOPInstanceUID == "ID_6431af929",
               "post1_SOPInstanceUID"] = df.loc[df.post_SOPInstanceUID ==
                                                "ID_6431af929", "Image"]
        df.loc[df.prepre_SOPInstanceUID == "ID_6431af929",
               "pre2_SOPInstanceUID"] = df.loc[df.prepre_SOPInstanceUID ==
                                               "ID_6431af929",
                                               "pre1_SOPInstanceUID"]
        df.loc[df.postpost_SOPInstanceUID == "ID_6431af929",
               "post2_SOPInstanceUID"] = df.loc[df.postpost_SOPInstanceUID ==
                                                "ID_6431af929",
                                                "post1_SOPInstanceUID"]
        df = df[[
            "Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID",
            "pre2_SOPInstanceUID", "post2_SOPInstanceUID"
        ]]
        ids = df["Image"].values
        gc.collect()

    with timer('preprocessing'):
        test_augmentation = Compose([
            CenterCrop(512 - 50, 512 - 50, p=1.0),
            Resize(img_size, img_size, p=1)
        ])

        test_dataset = RSNADatasetTest(df,
                                       img_size,
                                       IMAGE_PATH,
                                       id_colname=ID_COLUMNS,
                                       transforms=test_augmentation,
                                       black_crop=False,
                                       three_window=True,
                                       rescaling=False,
                                       pick_type="post_post",
                                       n_tta=N_TTA)
        test_loader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=16,
                                 pin_memory=True)
        del df, test_dataset
        gc.collect()

    with timer('create model'):
        model = CnnModel(num_classes=N_CLASSES,
                         encoder="se_resnext50_32x4d",
                         pretrained="imagenet",
                         pool_type="avg")
        model.load_state_dict(torch.load(model_path))
        model.to(device)
        model = torch.nn.DataParallel(model)

    with timer('predict'):
        pred = predict(model, test_loader, device, n_tta=N_TTA)
        pred = np.clip(pred, 1e-6, 1 - 1e-6)

    with timer('sub'):
        sub = pd.DataFrame(pred, columns=TARGET_COLUMNS)
        sub["ID"] = ids
        sub = sub.set_index("ID")
        sub = sub.unstack().reset_index()
        sub["ID"] = sub["ID"] + "_" + sub["level_0"]
        sub = sub.rename(columns={0: "Label"})
        sub = sub.drop("level_0", axis=1)
        LOGGER.info(sub.head())
        sub.to_csv("../output/{}_train.csv".format(EXP_ID), index=False)
def timer(name):
    t0 = time.time()
    yield
    LOGGER.info('[{}] done in {} s'.format(name, round(time.time() - t0, 2)))
Example #52
0
def train_lgbm(X_train, y_train, X_valid, y_valid, X_test, categorical_features, feature_name,
               fold_id, lgb_params, fit_params, model_name, loss_func, rank=False, calc_importances=True):
    train = lgb.Dataset(X_train, y_train,
                        categorical_feature=categorical_features,
                        feature_name=feature_name)
    if X_valid is not None:
        valid = lgb.Dataset(X_valid, y_valid,
                            categorical_feature=categorical_features,
                            feature_name=feature_name)
    evals_result = {}
    if X_valid is not None:
        model = lgb.train(
            lgb_params,
            train,
            valid_sets=[valid],
            valid_names=['valid'],
            evals_result=evals_result,
            **fit_params
        )
    else:
        model = lgb.train(
            lgb_params,
            train,
            evals_result=evals_result,
            **fit_params
        )
    LOGGER.info(f'Best Iteration: {model.best_iteration}')

    # train score
    if X_valid is None:
        y_pred_train = model.predict(X_train, num_iteration=fit_params["num_boost_round"])
        y_pred_train[y_pred_train<0] = 0
        train_loss = loss_func(y_train, y_pred_train)
    else:
        y_pred_train = model.predict(X_train, num_iteration=model.best_iteration)
        y_pred_train[y_pred_train < 0] = 0
        train_loss = loss_func(y_train, y_pred_train)

    if X_valid is not None:
        # validation score
        y_pred_valid = model.predict(X_valid)
        y_pred_valid[y_pred_valid < 0] = 0
        valid_loss = loss_func(y_valid, y_pred_valid)
        # save prediction
        #np.save(f'{model_name}_train_fold{fold_id}.npy', y_pred_valid)
    else:
        y_pred_valid = None
        valid_loss = None

    # save model
    """要編集"""
    model.save_model(os.path.join(f'../output/{model_name}', f'{model_name}_fold{fold_id}.txt'))

    if X_test is not None:
        # predict test
        y_pred_test = model.predict(X_test)
        y_pred_test[y_pred_test < 0] = 0
        # save prediction
        #np.save(f'{model_name}_test_fold{fold_id}.npy', y_pred_test)
    else:
        y_pred_test = None

    if calc_importances:
        importances = pd.DataFrame()
        importances['feature'] = feature_name
        importances['gain'] = model.feature_importance(importance_type='gain')
        importances['split'] = model.feature_importance(importance_type='split')
        importances['fold'] = fold_id
    else:
        importances = None

    return y_pred_valid, y_pred_test, train_loss, valid_loss, importances, model.best_iteration
Example #53
0
import json
from util import get_hpfeeds_client, get_ext_ip
from commands import perform_commands
import re
import datetime
import urlparse
import os

shellshock_re = re.compile(r'\(\s*\)\s*{')

# this is the default apache page
with open(os.path.join(os.path.dirname(__file__), 'template.html')) as f:
    page_template = f.read()

app = bottle.default_app()
LOGGER.info('Loading config file shockpot.conf ...')
app.config.load_config(os.path.join(os.path.dirname(__file__),
                                    'shockpot.conf'))
hpclient = get_hpfeeds_client(app.config)

public_ip = None
if app.config['fetch_public_ip.enabled'].lower() == 'true':
    public_ip = get_ext_ip(json.loads(app.config['fetch_public_ip.urls']))
    print 'public_ip =', public_ip


def is_shellshock(headers):
    for name, value in headers:
        if shellshock_re.search(value):
            return True
    return False
Example #54
0
def main(seed):
    with timer('load data'):
        df = pd.read_csv(FOLD_PATH)

    with timer('preprocessing'):
        val_df = df[df.fold_id == FOLD_ID]

        val_augmentation = None
        val_dataset = SeverDataset(val_df,
                                   IMG_DIR,
                                   IMG_SIZE,
                                   N_CLASSES,
                                   id_colname=ID_COLUMNS,
                                   transforms=val_augmentation)
        val_loader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                num_workers=8)

        del val_df, df, val_dataset
        gc.collect()

    with timer('create model'):
        model = smp.Unet('resnet34',
                         encoder_weights="imagenet",
                         classes=N_CLASSES,
                         encoder_se_module=True,
                         decoder_semodule=True,
                         h_columns=False,
                         skip=True,
                         act="swish",
                         freeze_bn=True,
                         classification=CLASSIFICATION,
                         attention_type="cbam")
        model.load_state_dict(torch.load(base_model))
        model.to(device)

        criterion = torch.nn.BCEWithLogitsLoss()

    with timer('predict'):
        valid_loss, y_pred, y_true, cls = predict(
            model,
            val_loader,
            criterion,
            device,
            classification=CLASSIFICATION)
        LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5)))

        scores = []
        for i, (th, remove_mask_pixel) in enumerate(zip(ths, remove_pixels)):
            sum_val_preds = np.sum(
                y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1)
            cls_ = cls[:, i]

            best = 0
            for th_cls in np.linspace(0, 1, 101):
                val_preds_ = copy.deepcopy(y_pred[:, i, :, :])
                val_preds_[sum_val_preds < remove_mask_pixel] = 0
                val_preds_[cls_ <= th_cls] = 0
                scores = []
                for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_):
                    score = dice(y_val_, y_pred_ > 0.5)
                    if np.isnan(score):
                        scores.append(1)
                    else:
                        scores.append(score)
                if np.mean(scores) >= best:
                    best = np.mean(scores)
                    best_th = th_cls
                else:
                    break
            LOGGER.info('dice={} on {}'.format(best, best_th))
            scores.append(best)

        LOGGER.info('holdout dice={}'.format(np.mean(scores)))
Example #55
0
def main():
    train_df = pd.read_csv(TRAIN_PATH)
    fold_df = pd.read_csv(FOLD_PATH)
    n_train_df = len(train_df)

    old_folds = pd.read_csv(FOLD_PATH_JIGSAW)

    old_df = pd.read_csv(OLD_PATH)
    old_df["target"] = old_df[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].sum(axis=1)
    old_df["target"] = (old_df["target"] >= 1).astype("int8")
    old_df = old_df[old_folds.fold_id != fold_id]
    train_df = train_df.append(old_df).reset_index(drop=True)
    del old_folds, old_df
    gc.collect()

    # y = np.where(train_df['target'] >= 0.5, 1, 0)
    y = train_df['target'].values

    identity_columns_new = []
    for column in identity_columns + ['target']:
        train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True, False)
        if column != "target":
            identity_columns_new.append(column + "_bin")

    # Overall
    #weights = np.ones((len(train_df),)) / 4
    # Subgroup
    #weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(np.int) / 4
    # Background Positive, Subgroup Negative
    #weights += (((train_df["target"].values >= 0.5).astype(bool).astype(np.int) +
    #             (1 - (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(
    #                 np.int))) > 1).astype(bool).astype(np.int) / 4
    # Background Negative, Subgroup Positive
    #weights += (((train_df["target"].values < 0.5).astype(bool).astype(np.int) +
    #             (train_df[identity_columns].fillna(0).values >= 0.5).sum(axis=1).astype(bool).astype(
    #                 np.int)) > 1).astype(bool).astype(np.int) / 4
    #loss_weight = 0.5

    with timer('preprocessing text'):
        # df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]]
        train_df['comment_text'] = train_df['comment_text'].astype(str)
        train_df = train_df.fillna(0)

    with timer('load embedding'):
        tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None, do_lower_case=True)
        X_text, train_lengths = convert_lines(train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, tokenizer)
        del train_lengths, tokenizer
        gc.collect()

    LOGGER.info(f"X_text {X_text.shape}")

    X_old = X_text[n_train_df:].astype("int32")
    X_text = X_text[:n_train_df].astype("int32")
    #w_trans = weights[n_train_df:].astype("float32")
    #weights = weights[:n_train_df].astype("float32")
    y_old = y[n_train_df:].astype("float32")
    y = y[:n_train_df].astype("float32")
    train_df = train_df[:n_train_df]

    with timer('train'):
        train_index = fold_df.fold_id != fold_id
        valid_index = fold_df.fold_id == fold_id
        X_train, y_train = X_text[train_index].astype("int32"), y[train_index].astype("float32")
        X_val, y_val = X_text[valid_index].astype("int32"), y[valid_index].astype("float32")
        test_df = train_df[valid_index]
        del X_text, y, train_index, valid_index, train_df
        gc.collect()

        model = BertForSequenceClassification.from_pretrained(WORK_DIR, cache_dir=None, num_labels=n_labels)
        model.zero_grad()
        model = model.to(device)

        X_train = np.concatenate([X_train, X_old], axis=0)
        y_train = np.concatenate([y_train, y_old], axis=0)
        train_size = len(X_train)
        del X_old, y_old
        gc.collect()

        train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.long),
                                                       torch.tensor(y_train, dtype=torch.float32))
        valid = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.long),
                                               torch.tensor(y_val, dtype=torch.float32))
        ran_sampler = torch.utils.data.RandomSampler(train_dataset)
        len_sampler = LenMatchBatchSampler(ran_sampler, batch_size=batch_size, drop_last=False)
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_sampler=len_sampler)
        valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size * 2, shuffle=False)
        del X_train, y_train, X_val, y_val
        gc.collect()
        LOGGER.info(f"done data loader setup")

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

        num_train_optimization_steps = int(epochs * train_size / batch_size / accumulation_steps)
        total_step = int(epochs * train_size / batch_size)

        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=base_lr,
                             warmup=0.005,
                             t_total=num_train_optimization_steps)
        LOGGER.info(f"done optimizer loader setup")

        model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)
        criterion = torch.nn.BCEWithLogitsLoss().to(device)
        #criterion = CustomLoss(loss_weight).to(device)
        LOGGER.info(f"done amp setup")

        for epoch in range(epochs):
            LOGGER.info(f"Starting {epoch} epoch...")
            LOGGER.info(f"length {train_size} train...")
            if epoch == 1:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = base_lr * gammas[1]
            tr_loss, train_losses = train_one_epoch(model, train_loader, criterion, optimizer, device,
                                                    accumulation_steps, total_step, n_labels, base_lr,
                                                    gamma=gammas[2 * epoch])
            LOGGER.info(f'Mean train loss: {round(tr_loss,5)}')

            torch.save(model.state_dict(), '{}_dic_epoch{}'.format(exp, epoch))
            torch.save(optimizer.state_dict(), '{}_optimizer_epoch{}.pth'.format(exp, epoch))

            valid_loss, oof_pred = validate(model, valid_loader, criterion, device, n_labels)
            LOGGER.info(f'Mean valid loss: {round(valid_loss,5)}')

            if epochs > 1:
                test_df_cp = test_df.copy()
                test_df_cp["pred"] = oof_pred[:, 0]
                test_df_cp = convert_dataframe_to_bool(test_df_cp)
                bias_metrics_df = compute_bias_metrics_for_model(test_df_cp, identity_columns)
                LOGGER.info(bias_metrics_df)

                score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df_cp))
                LOGGER.info(f'score is {score}')

        del model
        gc.collect()
        torch.cuda.empty_cache()

    test_df["pred"] = oof_pred[:, 0]
    test_df = convert_dataframe_to_bool(test_df)
    bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns)
    LOGGER.info(bias_metrics_df)

    score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df))
    LOGGER.info(f'final score is {score}')

    test_df.to_csv("oof.csv", index=False)

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss');
    plt.legend();
    plt.xticks(xs);
    plt.xlabel('Iter')
    plt.savefig("loss.png")
Example #56
0
def get_redgifs_gif(query: str,
                    username: str,
                    after_dark_only: bool = False) -> Optional[str]:
    """
    Fetch a special kind of gif, if you know what I mean ;).

    :param str query: Gif search query.
    :param str username: Chatango user who triggered the command.
    :param bool after_dark_only: Whether results should be limited to the `after dark` timeframe.

    :returns: Optional[str]
    """
    try:
        night_mode = is_after_dark()
        if (after_dark_only and night_mode) or after_dark_only is False:
            token = redgifs_auth_token()
            endpoint = REDGIFS_IMAGE_SEARCH_ENDPOINT
            params = {
                "search_text": query.title(),
                "order": "trending",
                "count": 80
            }
            headers = {"Authorization": f"Bearer {token}"}
            resp = requests.get(endpoint, params=params, headers=headers)
            results = resp.json().get("gifs", None)
            if resp.status_code == 200 and results is not None:
                results = [
                    result for result in results
                    if result["urls"].get("sd") is not None
                ]
                if bool(results):
                    rand = randint(0, len(results) - 1)
                    image_json = results[rand]
                    return get_full_gif_metadata(image_json)
                elif username == "thegreatpizza":
                    return emojize(
                        f":pizza: *h* wow pizza ur taste in lesbians is so dank that I coughldnt find nething sry :( *h* :pizza:",
                        use_aliases=True,
                    )
                elif username == "broiestbro":
                    return emojize(
                        f":@ bro u fgt wot r u searching 4 go2bed :@",
                        use_aliases=True,
                    )
                else:
                    return emojize(
                        f":warning: wow @{username} u must b a freak tf r u even searching foughr jfc :warning:",
                        use_aliases=True,
                    )
            else:
                LOGGER.error(
                    f"Error {resp.status_code} fetching NSFW gif: {resp.content}"
                )
                return emojize(
                    f":warning: omfg @{username} u broke bot with ur kinky ass bs smfh :warning:",
                    use_aliases=True,
                )
        return "https://i.imgur.com/oGMHkqT.jpg"
    except HTTPError as e:
        LOGGER.warning(
            f"HTTPError while fetching nsfw image for `{query}`: {e.response.content}"
        )
        return emojize(
            f":warning: yea nah idk wtf ur searching for :warning:",
            use_aliases=True,
        )
    except IndexError as e:
        LOGGER.warning(
            f"IndexError while fetching nsfw image for `{query}`: {e}")
        return emojize(
            f":warning: yea nah idk wtf ur searching for :warning:",
            use_aliases=True,
        )
    except Exception as e:
        LOGGER.warning(
            f"Unexpected error while fetching nsfw image for `{query}`: {e}")
        return emojize(
            f":warning: dude u must b a freak cuz that just broke bot :warning:",
            use_aliases=True,
        )
Example #57
0
 def on_exchange_declareok(self, _unused_frame, userdata):
     LOGGER.info('Exchange declared: %s', userdata)
     self.setup_queue(self.QUEUE)
def main():
    train_df = pd.read_csv(TRAIN_PATH)
    train_df['male'] = np.load(
        "../input/identity-column-data/male_labeled.npy")
    train_df['female'] = np.load(
        "../input/identity-column-data/female_labeled.npy")
    train_df['homosexual_gay_or_lesbian'] = np.load(
        "../input/identity-column-data/homosexual_gay_or_lesbian_labeled.npy")
    train_df['christian'] = np.load(
        "../input/identity-column-data/christian_labeled.npy")
    train_df['jewish'] = np.load(
        "../input/identity-column-data/jewish_labeled.npy")
    train_df['muslim'] = np.load(
        "../input/identity-column-data/muslim_labeled.npy")
    train_df['black'] = np.load(
        "../input/identity-column-data/black_labeled.npy")
    train_df['white'] = np.load(
        "../input/identity-column-data/white_labeled.npy")
    train_df['psychiatric_or_mental_illness'] = np.load(
        "../input/identity-column-data/psychiatric_or_mental_illness_labeled.npy"
    )
    fold_df = pd.read_csv(FOLD_PATH)

    # y = np.where(train_df['target'] >= 0.5, 1, 0)
    y = train_df['target'].values
    y_aux = train_df[AUX_COLUMNS].values

    identity_columns_new = []
    for column in identity_columns + ['target']:
        train_df[column + "_bin"] = np.where(train_df[column] >= 0.5, True,
                                             False)
        if column != "target":
            identity_columns_new.append(column + "_bin")

    # Overall
    weights = np.ones((len(train_df), )) / 4
    # Subgroup
    weights += (train_df[identity_columns].fillna(0).values >= 0.5).sum(
        axis=1).astype(bool).astype(np.int) / 4
    # Background Positive, Subgroup Negative
    weights += (
        ((train_df["target"].values >= 0.5).astype(bool).astype(np.int) +
         (1 - (train_df[identity_columns].fillna(0).values >= 0.5).sum(
             axis=1).astype(bool).astype(np.int))) > 1).astype(bool).astype(
                 np.int) / 4
    # Background Negative, Subgroup Positive
    weights += (
        ((train_df["target"].values < 0.5).astype(bool).astype(np.int) +
         (train_df[identity_columns].fillna(0).values >= 0.5).sum(
             axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(
                 np.int) / 4
    loss_weight = 0.5

    with timer('preprocessing text'):
        # df["comment_text"] = [analyzer_embed(text) for text in df["comment_text"]]
        train_df['comment_text'] = train_df['comment_text'].astype(str)
        train_df = train_df.fillna(0)

    with timer('load embedding'):
        tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH,
                                                  cache_dir=None,
                                                  do_lower_case=False)
        X_text = convert_lines_head_tail(
            train_df["comment_text"].fillna("DUMMY_VALUE"), max_len, head_len,
            tokenizer)
        del tokenizer
        gc.collect()

    LOGGER.info(f"X_text {X_text.shape}")

    with timer('train'):
        train_index = fold_df.fold_id != fold_id
        valid_index = fold_df.fold_id == fold_id
        X_train, y_train, y_aux_train, w_train = X_text[train_index].astype(
            "int32"), y[train_index], y_aux[train_index], weights[train_index]
        X_val, y_val, y_aux_val, w_val = X_text[valid_index].astype("int32"), y[valid_index], y_aux[valid_index], \
                                         weights[
                                             valid_index]
        test_df = train_df[valid_index]
        del X_text, y, y_aux, weights, train_index, valid_index, train_df
        gc.collect()

        model = BertForSequenceClassification(bert_config, num_labels=n_labels)
        model.load_state_dict(torch.load(model_path))
        model.zero_grad()
        model = model.to(device)

        y_train = np.concatenate(
            (y_train.reshape(-1, 1), w_train.reshape(-1, 1), y_aux_train),
            axis=1).astype("float32")
        y_val = np.concatenate(
            (y_val.reshape(-1, 1), w_val.reshape(-1, 1), y_aux_val),
            axis=1).astype("float32")

        train_dataset = torch.utils.data.TensorDataset(
            torch.tensor(X_train, dtype=torch.long),
            torch.tensor(y_train, dtype=torch.float32))
        valid = torch.utils.data.TensorDataset(
            torch.tensor(X_val, dtype=torch.long),
            torch.tensor(y_val, dtype=torch.float32))
        ran_sampler = torch.utils.data.RandomSampler(train_dataset)
        len_sampler = LenMatchBatchSampler(ran_sampler,
                                           batch_size=batch_size,
                                           drop_last=False)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_sampler=len_sampler)
        valid_loader = torch.utils.data.DataLoader(valid,
                                                   batch_size=batch_size * 2,
                                                   shuffle=False)
        LOGGER.info(f"done data loader setup")

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]

        num_train_optimization_steps = int(epochs * len(X_train) / batch_size /
                                           accumulation_steps)
        total_step = int(epochs * len(X_train) / batch_size)

        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=base_lr,
                             warmup=0.005,
                             t_total=num_train_optimization_steps)
        LOGGER.info(f"done optimizer loader setup")

        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level="O1",
                                          verbosity=0)
        # criterion = torch.nn.BCEWithLogitsLoss().to(device)
        criterion = CustomLoss(loss_weight).to(device)
        LOGGER.info(f"done amp setup")

        for epoch in range(1, epochs + 1):
            LOGGER.info(f"Starting {epoch} epoch...")
            LOGGER.info(f"length {len(X_train)} train {len(X_val)} train...")
            if epoch == 1:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = base_lr * gammas[1]
            tr_loss, train_losses = train_one_epoch(model,
                                                    train_loader,
                                                    criterion,
                                                    optimizer,
                                                    device,
                                                    accumulation_steps,
                                                    total_step,
                                                    n_labels,
                                                    base_lr,
                                                    gamma=gammas[2 * epoch])
            LOGGER.info(f'Mean train loss: {round(tr_loss,5)}')

            torch.save(model.state_dict(),
                       '{}_epoch{}_fold{}.pth'.format(exp, epoch, fold_id))

            valid_loss, oof_pred = validate(model, valid_loader, criterion,
                                            device, n_labels)
            LOGGER.info(f'Mean valid loss: {round(valid_loss,5)}')

        del model
        gc.collect()
        torch.cuda.empty_cache()

    test_df["pred"] = oof_pred[:, 0]
    test_df = convert_dataframe_to_bool(test_df)
    bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns)
    LOGGER.info(bias_metrics_df)

    score = get_final_metric(bias_metrics_df, calculate_overall_auc(test_df))
    LOGGER.info(f'final score is {score}')

    test_df.to_csv("oof.csv", index=False)

    xs = list(range(1, len(train_losses) + 1))
    plt.plot(xs, train_losses, label='Train loss')
    plt.legend()
    plt.xticks(xs)
    plt.xlabel('Iter')
    plt.savefig("loss.png")
Example #59
0
 def __del__(self):
     LOGGER.info("RemoteController exited")
def mri_wrapper(ctx, input_folder, from_loris=False):

    # get config.json
    config = ctx.obj['cfgjson']

    # get the folders from config.json
    script_parallel_path = os.path.abspath('./mri_run_parallel')
    script_merge_path = os.path.abspath('./mri_output_merge')

    mri_raw_root = os.path.abspath(
        config['mri']['input_folders']['nifti']['raw'])
    mri_raw_folder = os.path.join(mri_raw_root, input_folder)

    mri_input_root = os.path.abspath(
        config['mri']['input_folders']['nifti']['organized'])
    mri_input_folder = os.path.join(mri_input_root, input_folder)

    imaging_root = os.path.abspath(config['mipmap']['input_folder']['imaging'])
    imaging_source_path = os.path.join(imaging_root, input_folder)

    mri_output_spm12_root = config['mri']['output_folders']['spm12']
    mri_output_spm12_folder = os.path.join(mri_output_spm12_root, input_folder)

    if not from_loris:
        # Reorganize mri files
        LOGGER.info('Reorganizing nifti files in folder %s' % mri_input_folder)
        run_cmd = 'python2 mri_nifti_reorganize/organizer.py %s %s' % (
            mri_raw_folder, mri_input_folder)
        os.system(run_cmd)
    else:
        LOGGER.info(
            'Skipping NIFTI reorganization step, files already organized by LORIS-for-MIP'
        )

    # run matlab spm12 script

    LOGGER.info('Running spm12 pipeline...')
    LOGGER.info('Storing output files in %s' % mri_output_spm12_folder)
    os.chdir(script_parallel_path)
    run_cmd = 'python2 mri_parallel_preprocessing.py %s %s' % (
        mri_input_folder, mri_output_spm12_folder)
    LOGGER.info('Executing...%s' % run_cmd)
    os.system(run_cmd)
    # merge the output into one csv
    os.chdir(script_merge_path)
    run_cmd = 'python2 merge.py %s %s' % (mri_output_spm12_folder,
                                          imaging_source_path)
    LOGGER.info(
        'Merging spm12 output pipeline into single csv file in folder %s' %
        imaging_source_path)
    os.system(run_cmd)