Ejemplo n.º 1
0
 def run_cmd(self, *args):
     """Run help command"""
     conf = get_config('help.json')
     response = conf["help"]
     if not self.option:
         response = self.config["help"]
     else:
         get_help = get_config(f'{self.option}.json')
         response = get_help["help"]
     return "\n".join(response)
Ejemplo n.º 2
0
 def __init__(self, event, user):
     self.config = get_config('example.json')
     self.parsed_args = SlackArgParse(self.config['valid_args'],
                                      self.config['options'], event['text'])
     self.args = self.parsed_args.args
     self.option = self.parsed_args.option
     self.response = self.run_cmd()
Ejemplo n.º 3
0
 def __init__(self, args, option=None, team=None, player=None):
     self.args = args
     self.option = option
     self.team = team
     self.player = player
     self.config = get_config('nhl_config.json')
     self.emojis = self.config['emojis']
     self.nhl = NHL()
Ejemplo n.º 4
0
 def __init__(self, event, user):
     self.config = get_config('help.json')
     try:
         self.option = event["text"].split()[1]
     except IndexError:
         self.option = False
     self.option = False
     self.user = event["user"]
Ejemplo n.º 5
0
 def __init__(self, event, user):
     self.text = event['text']
     self.config = get_config('config.json')
     self.parsed_args = SlackArgParse(self.config['valid_args'], self.config['options'], event['text'].lower())
     self.args = self.parsed_args.args
     self.option = self.parsed_args.option
     self.league = self._get_league()
     self.team_name = self._get_team_name()
     self.response = self.run_cmd()
def get_general_page():
    PATH2ROOT = Path('..')
    PATH2CONFIG = Path(PATH2ROOT / 'configs')
    CONFIG = get_config(PATH2CONFIG / 'config.yml')

    tag_map = get_tag_map(PATH2CONFIG)
    inv_tag_map = get_inv_tag_map(tag_map)

    device = get_device(CONFIG)
    model = get_model(PATH2ROOT, CONFIG, device, len(tag_map.keys()))

    st.title('Named Entity Recognition using BERT')
    st.subheader(
        'This simple application provide a way to visualize token classification task (NER)💡'
    )

    text_key = st.selectbox('select some example or type your text',
                            list(TEXT_EXAMPLES.keys()))

    raw_text = st.text_area("enter text here", value=TEXT_EXAMPLES[text_key])
    raw_text_split = raw_text.split()

    test_dataset = NamedEntityRecognitionDataset(
        texts=[raw_text_split],
        tags=[[0] * len(raw_text_split)],
        tokenizer=CONFIG['model']['model_name'],
        max_seq_len=CONFIG['model']['max_seq_length'],
        lazy_mode=False,
    )

    if st.button('Submit🔥'):
        if raw_text == '':
            st.warning('Please, enter some text...')
            return
        elif len(raw_text_split) > CONFIG['model']['max_seq_length']:
            st.warning(
                f"Text should be shorter than {CONFIG['model']['max_seq_length']} tokens..."
            )
            return
        ann_text = get_prediction(model, test_dataset, device)

        annotated_text(*[
            text +
            " " if class_ == tag_map['O'][0] else (text + " ",
                                                   inv_tag_map[class_][0],
                                                   inv_tag_map[class_][1])
            for text, class_ in ann_text
        ])

        ann_text = np.array(ann_text)
        db_insert(
            raw_text,
            json.dumps(ann_text[:, 0].tolist(), ensure_ascii=False),
            json.dumps(CONFIG),
            json.dumps(ann_text[:, 1].tolist(), ensure_ascii=False),
        )
Ejemplo n.º 7
0
    def __init__(self, token):
        """
        Initialize the Slack object given the provided bot token

        :param token:
        :param config:
        """
        self.config = get_config('slack.json')
        self.client = slackclient.SlackClient(token)
        self.commands = self.load_commands('/jockbot/commands/')
Ejemplo n.º 8
0
 def __init__(self, api_version="1.2"):
     self.api_key = os.environ.get('MYSPORTSFEEDS_API_KEY')
     self.version = api_version
     self.password = os.environ.get('MYSPORTSFEEDS_PASSWORD')
     self.date = datetime.datetime.now()
     self.base_url = f"https://api.mysportsfeeds.com/v{self.version}/pull/nfl/"
     self.session = requests.session()
     self.league_schedule = self.get_schedule()
     self.upcoming_games = self.get_games_by_week()
     self.config = get_config('nfl_config.json')
     self.league_game_results = []
     self.played_games = []
     self.unplayed_games = []
     self.league_played_games = []
     self.league_unplayed_games = []
     self.loop = asyncio.new_event_loop()
     self.loop.run_until_complete(self.parse_league_games())
     self.loop.run_until_complete(self.gather_league_data())
Ejemplo n.º 9
0
def get_about_page():
    st.title('About')

    PATH2ROOT = Path('..')
    PATH2CONFIG = Path(PATH2ROOT / 'configs')
    CONFIG = get_config(PATH2CONFIG / 'config.yml')

    st.header('Model parameters')
    st.json(CONFIG)

    st.header('Loss function plot')
    st.markdown(
        '![](https://github.com/kryvokhyzha/bert-for-ukranian-ner/blob/main/imgs/tb_focal_loss.png?raw=true)'
    )

    st.header('Accuracy plot')
    st.markdown(
        '![](https://github.com/kryvokhyzha/bert-for-ukranian-ner/blob/main/imgs/tb_accuracy.png?raw=true)'
    )
Ejemplo n.º 10
0
from contextlib import closing
from mysql.connector import OperationalError
from utils.helpers import get_config, get_connection

if __name__ == '__main__':
    config = get_config()
    with closing(get_connection(**config)) as conn:
        print('Connection successful')

        c = conn.cursor()
        update = """
        update customer as c, address as a
        set c.email = %s, a.phone = %s
        where c.address_id = a.address_id and c.customer_id = 100;
        """
        email = '*****@*****.**'
        phone = '949-555-1234'
        c.execute(update, (email, phone))
        conn.commit()
Ejemplo n.º 11
0
def main(data_path='/data/SN6_buildings/train/AOI_11_Rotterdam/',
         config_path='/project/configs/senet154_gcc_fold1.py',
         gpu='0'):

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    config = get_config(config_path)
    model_name = config['model_name']
    fold_number = config['fold_number']
    alias = config['alias']
    log_path = osp.join(config['logs_path'],
                        alias + str(fold_number) + '_' + model_name)

    device = torch.device(config['device'])
    weights = config['weights']
    loss_name = config['loss']
    optimizer_name = config['optimizer']
    lr = config['lr']
    decay = config['decay']
    momentum = config['momentum']
    epochs = config['epochs']
    fp16 = config['fp16']
    n_classes = config['n_classes']
    input_channels = config['input_channels']
    main_metric = config['main_metric']

    best_models_count = config['best_models_count']
    minimize_metric = config['minimize_metric']
    min_delta = config['min_delta']

    train_images = data_path
    data_type = config['data_type']
    masks_data_path = config['masks_data_path']
    folds_file = config['folds_file']
    train_augs = config['train_augs']
    preprocessing_fn = config['preprocessing_fn']
    limit_files = config['limit_files']
    batch_size = config['batch_size']
    shuffle = config['shuffle']
    num_workers = config['num_workers']
    valid_augs = config['valid_augs']
    val_batch_size = config['val_batch_size']
    multiplier = config['multiplier']

    train_dataset = SemSegDataset(images_dir=train_images,
                                  data_type=data_type,
                                  masks_dir=masks_data_path,
                                  mode='train',
                                  n_classes=n_classes,
                                  folds_file=folds_file,
                                  fold_number=fold_number,
                                  augmentation=train_augs,
                                  preprocessing=preprocessing_fn,
                                  limit_files=limit_files,
                                  multiplier=multiplier)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=shuffle,
                              num_workers=num_workers)

    valid_dataset = SemSegDataset(images_dir=train_images,
                                  data_type=data_type,
                                  mode='valid',
                                  folds_file=folds_file,
                                  n_classes=n_classes,
                                  fold_number=fold_number,
                                  augmentation=valid_augs,
                                  preprocessing=preprocessing_fn,
                                  limit_files=limit_files)

    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=val_batch_size,
                              shuffle=False,
                              num_workers=num_workers)

    model = make_model(model_name=model_name,
                       weights=weights,
                       n_classes=n_classes,
                       input_channels=input_channels).to(device)

    loss = get_loss(loss_name=loss_name)
    optimizer = get_optimizer(optimizer_name=optimizer_name,
                              model=model,
                              lr=lr,
                              momentum=momentum,
                              decay=decay)

    if config['scheduler'] == 'reduce_on_plateau':
        print('reduce lr')
        alpha = config['alpha']
        patience = config['patience']
        threshold = config['thershold']
        min_lr = config['min_lr']
        mode = config['scheduler_mode']
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer,
            factor=alpha,
            verbose=True,
            patience=patience,
            mode=mode,
            threshold=threshold,
            min_lr=min_lr)
    elif config['scheduler'] == 'steps':
        print('steps lr')
        steps = config['steps']
        step_gamma = config['step_gamma']
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                         milestones=steps,
                                                         gamma=step_gamma)
    else:
        scheduler = None

    callbacks = []

    dice_callback = DiceCallback()
    callbacks.append(dice_callback)
    callbacks.append(CheckpointCallback(save_n_best=best_models_count))
    callbacks.append(
        EarlyStoppingCallback(patience=config['early_stopping'],
                              metric=main_metric,
                              minimize=minimize_metric,
                              min_delta=min_delta))

    runner = SupervisedRunner(device=device)
    loaders = {'train': train_loader, 'valid': valid_loader}

    runner.train(model=model,
                 criterion=loss,
                 optimizer=optimizer,
                 loaders=loaders,
                 scheduler=scheduler,
                 callbacks=callbacks,
                 logdir=log_path,
                 num_epochs=epochs,
                 verbose=True,
                 main_metric=main_metric,
                 minimize_metric=minimize_metric,
                 fp16=fp16)
Ejemplo n.º 12
0
def main(config_path='/project/configs/senet154_gcc_fold1.py',
         test_images='/data/SN6_buildings/test_public/AOI_11_Rotterdam/',
         test_predict_result='/wdata/folds_predicts/',
         batch_size=1,
         workers=1,
         gpu='1'):

    with torch.no_grad():

        config = get_config(config_path)
        model_name = config['model_name']
        weights_path = config['load_from']
        device = config['device']
        val_batch_size = batch_size
        input_channels = config['input_channels']

        original_size = config['original_size']
        cropper = albu.Compose(
            [albu.CenterCrop(original_size[0], original_size[1], p=1.0)])
        n_classes = config['n_classes']
        preprocessing_fn = config['preprocessing_fn']
        valid_augs = config['valid_augs']
        limit_files = config['limit_files']
        num_workers = workers
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        if not os.path.exists(test_predict_result):
            os.mkdir(test_predict_result)
        fold_name = weights_path.split('/')[-3]
        folder_to_save = os.path.join(test_predict_result, fold_name)
        if os.path.exists(folder_to_save):
            shutil.rmtree(folder_to_save)

        os.mkdir(folder_to_save)

        test_dataset = TestSemSegDataset(images_dir=os.path.join(
            test_images, 'SAR-Intensity'),
                                         preprocessing=preprocessing_fn,
                                         augmentation=valid_augs,
                                         limit_files=limit_files)

        test_loader = DataLoader(dataset=test_dataset,
                                 batch_size=val_batch_size,
                                 shuffle=False,
                                 num_workers=num_workers)
        print('Loading {}'.format(weights_path))
        model = make_model(model_name=model_name,
                           weights=None,
                           n_classes=n_classes,
                           input_channels=input_channels).to(device)

        model.load_state_dict(torch.load(weights_path)['model_state_dict'])

        model.eval()
        model = tta.TTAWrapper(model, flip_image2mask)
        model = torch.nn.DataParallel(model).cuda()

        file_names = sorted(test_dataset.ids)

        for batch_i, test_batch in enumerate(tqdm(test_loader)):
            runner_out = model(test_batch.cuda())
            image_pred = runner_out

            image_pred = image_pred.cpu().detach().numpy()
            names = file_names[batch_i * val_batch_size:(batch_i + 1) *
                               val_batch_size]
            for i in range(len(names)):
                file_name = os.path.join(folder_to_save,
                                         names[i].split('.')[0] + '.png')

                data = image_pred[i, ...]
                data = np.moveaxis(data, 0, -1)
                sample = cropper(image=data)
                data = sample['image']
                data = (data * 255).astype(np.uint8)
                cv2.imwrite(file_name, data)
Ejemplo n.º 13
0
import joblib
import torch
from catalyst.utils import prepare_cudnn, set_global_seed
from sklearn import model_selection
from transformers import AdamW, get_linear_schedule_with_warmup

from datasets import NamedEntityRecognitionDataset
from models import NamedEntityRecognitionBertModel
from models.loss_fn import FocalLossCustom
from utils.callbacks import AccuracyCallbackCustom
from utils.helpers import get_config, remove_dir
from utils.runners import CustomRunner

PATH2ROOT = Path('..')
PATH2CONFIG = Path(PATH2ROOT / 'configs')
CONFIG = get_config(PATH2CONFIG / 'config.yml')
PATH2CORPUS = Path(PATH2ROOT / CONFIG['data']['path_to_corpus_folder'])
MODEL_NAME = CONFIG['model']['model_name']

if __name__ == "__main__":
    data = joblib.load(PATH2ROOT / CONFIG['data']['path_to_preproc_data'])
    texts = data['text'].values.tolist()
    tags = data['tags'].values.tolist()

    with open(PATH2CONFIG / 'target_mapper.json', 'r') as file:
        tag_map = json.load(file)

    tag_map = {key: val[0] for key, val in tag_map.items()}

    for i in range(len(tags)):
        for j in range(len(tags[i])):