arch = 'efficientnet-b0' model_name = f'{arch}-v1' # Parameters for the entire model (stem, all blocks, and head) md_ef = EfficientNet.from_pretrained(arch, num_classes=1, dropout_rate=0.5) # md_ef = resnet50(pretrained=False, num_classes=1) learn = Learner(data, md_ef, opt_func=optar, metrics = [accuracy_thresh], model_dir='fastai-class1').to_fp16() learn.path = Path(DATA_BASE_PATH) # First learn.unfreeze() learn.fit_one_cycle(10, max_lr=1e-2) learn.save(f'{model_name}') # First learn.unfreeze() learn.fit_one_cycle(10, max_lr=1e-3) learn.save(f'{model_name}-stage2') # First learn.unfreeze() learn.fit_one_cycle(10, max_lr=1e-3) learn.save(f'{model_name}-stage3')
class CoruscantModel: type_pretrained = None data_root = None list_files = None model_dir = None tokenizer_pretrained_coruscant = None coruscant_vocab = None coruscant_tokenizer = None # data bunch data_bunch = None batch_size = None # data to feed the model train = None test = None val = None # model bert_model_class = None loss_func = None acc_02 = None model = None learner = None # constants label_cols = None text_cols = None # init constructor def __init__(self, type_pretrained='BERT', text_cols="comment_text", list_files=["train.csv", "test.csv"], label_cols=[ "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate" ], data_root=Path("..") / "api/app/dataset/jigsaw", model_dir='model', batch_size=12): self.data_root = data_root self.model_dir = model_dir self.batch_size = batch_size self.label_cols = label_cols self.text_cols = text_cols self.list_files = list_files self.type_pretrained = type_pretrained gc.collect() log.debug('type_pretrained: ' + type_pretrained) if self.type_pretrained == 'BERT': self.tokenizer_pretrained_coruscant = BertTokenizer.from_pretrained( "bert-base-uncased") def make_model(self): log.debug('----- set_train_val_data ------') self.set_train_val_data() log.debug('----- set_vocab_tokenizer ------') self.set_vocab_tokenizer() log.debug('----- set_data_bunch ------') self.set_data_bunch() log.debug('----- create_model ------') self.create_model() log.debug('----- train_and_save ------') self.train_save() def set_data_bunch(self): self.data_bunch = TextDataBunch.from_df( ".", self.train, self.val, tokenizer=self.coruscant_tokenizer, vocab=self.coruscant_vocab, include_bos=False, include_eos=False, text_cols=self.text_cols, label_cols=self.label_cols, bs=self.batch_size, collate_fn=partial(pad_collate, pad_first=False, pad_idx=0), ) def set_train_val_data(self): self.train, self.test = [ pd.read_csv(self.data_root / fname) for fname in self.list_files ] self.train, self.val = train_test_split(self.train, shuffle=True, test_size=0.2, random_state=42) # log.info(self.train.head()) def set_vocab_tokenizer(self): # In following code snippets, we need to wrap BERT vocab and BERT tokenizer with Fastai modules self.coruscant_vocab = Vocab( list(self.tokenizer_pretrained_coruscant.vocab.keys())) self.coruscant_tokenizer = Tokenizer(tok_func=FastAiBertTokenizer( self.tokenizer_pretrained_coruscant, max_seq_len=256), pre_rules=[], post_rules=[]) def create_model(self): # BERT model bert_model_class = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=6) # Loss function to be used is Binary Cross Entropy with Logistic Losses loss_func = nn.BCEWithLogitsLoss() # Considering this is a multi-label classification problem, we cant use simple accuracy as metrics here. # we will use accuracy_thresh with threshold of 25% as our metric here. acc_02 = partial(accuracy_thresh, thresh=0.25) self.model = bert_model_class # learner function self.learner = Learner(self.data_bunch, self.model, loss_func=loss_func, model_dir=self.model_dir, metrics=acc_02) def train_save(self): x = bert_clas_split(self.model) # Let's split the model now in 6 parts self.learner.split([x[0], x[1], x[2], x[3], x[5]]) self.learner.lr_find() self.learner.fit_one_cycle(2, max_lr=slice(1e-5, 5e-4), moms=(0.8, 0.7), pct_start=0.2, wd=(1e-7, 1e-5, 1e-4, 1e-3, 1e-2)) self.learner.save(self.type_pretrained + '_first') self.learner.load(self.type_pretrained + '_first') # Now, we will unfreeze last two last layers and train the model again self.learner.freeze_to(-2) self.learner.fit_one_cycle(2, max_lr=slice(1e-5, 5e-4), moms=(0.8, 0.7), pct_start=0.2, wd=(1e-7, 1e-5, 1e-4, 1e-3, 1e-2)) self.learner.save(self.type_pretrained + '_final') self.learner.load(self.type_pretrained + '_final') # We will now unfreeze the entire model and train it self.learner.unfreeze() self.learner.lr_find() self.learner.fit_one_cycle(2, slice(5e-6, 5e-5), moms=(0.8, 0.7), pct_start=0.2, wd=(1e-7, 1e-5, 1e-4, 1e-3, 1e-2)) def test_prediction(self): # We will now see our model's prediction power text = 'you are so sweet' log.info(text) log.info(self.learner.predict(text)) text = 'you are pathetic piece of shit' log.info(text) log.info(self.learner.predict(text)) text = "what’s so great about return of the jedi? the special effects are abysmal, and the acting is " \ "horrible. it’s like they phoned it in. it’s a mess." log.info(text) log.info(self.learner.predict(text)) text = "i hate myself for being too human. how do i liberate my soul ?" log.info(text) log.info(self.learner.predict(text)) text = "why was guru arjun singh killed by jahangir?" log.info(text) log.info(self.learner.predict(text)) text = "funny how the person that bullies you in elementary is ugly as f**k in high school, and your high " \ "school bull1, a loser in college..." log.info(text) log.info(self.learner.predict(text)) text = "stop making fun of amy winehouse and michael jackso2, #rickcastellano is a bully." log.info(text) log.info(self.learner.predict(text))
def train(self, df_path, data_root, output_dir, weights=False, col_image='image_path', col_label='label', col_group=None): ''' train ''' import matplotlib matplotlib.use('Agg') from fastai.vision import Learner from fastai.vision import get_transforms, models from fastai.vision import accuracy, AUROC from fastai.vision import DataBunch, DatasetType from fastai.callbacks import SaveModelCallback data_root = Path(data_root) output_dir = Path(output_dir) output_dir.mkdir(parents=True,exist_ok=True) model_name = 'scripted_model.zip' df = pd.read_csv(df_path) num_classes = df['label'].nunique() df_train = df[df.dataset.isin(['train'])] df_valid = df[df.dataset.isin(['valid'])] df_test = df[df.dataset.isin(['test'])] sample_size = self._sample_size sample_duration = self._sample_duration input_channels = self._input_channels num_workers = self._num_workers batch_size = self._batch_size n_epoch = self._n_epoch devices = self._devices if len(devices) == 0 or devices[0].lower() != 'cpu': pin_memory = True device_data = devices[0] else: pin_memory = False device_data = None from vol_dataset import VolumeDataset ds_train = VolumeDataset(df_train,data_root,input_channels) ds_valid = VolumeDataset(df_valid,data_root,input_channels) ds_test = VolumeDataset(df_test,data_root,input_channels) data = DataBunch.create(ds_train, ds_valid, test_ds=ds_test, bs=batch_size, num_workers=num_workers, device=device_data, pin_memory=pin_memory) print(df_train.shape, df_valid.shape, df_test.shape) from resnet3d import r3d_18 as resnet18 model = resnet18(input_channels=input_channels,num_classes=num_classes) model_single = model if len(devices) >= 2: model = nn.DataParallel(model_single,device_ids=devices) if isinstance(weights,bool): if weights: weights = 1/ds_train.label.value_counts(sort=False) weights = weights.values/weights.min() else: weights = [1,1,1] elif isinstance(weights,str) and ',' in weights: weights = [float(w) for w in weights.split(',')] elif isinstance(weights,list) or isinstance(weights,tuple): pass weights = torch.tensor(weights) loss_func = nn.CrossEntropyLoss(weight=weights) loss_func = loss_func.to(devices[0]) metrics = [accuracy] metrics += [AUCk(num_classes-1)] learn = Learner(data, model, metrics=metrics, wd=1e-2, path=output_dir, loss_func=loss_func) lr = self._lr learn.fit_one_cycle(n_epoch, slice(lr), callbacks=[SaveModelCallback(learn, every='improvement',monitor='valid_loss', name='best')]) lr = self._lr/10 learn.fit_one_cycle(n_epoch, slice(lr), callbacks=[SaveModelCallback(learn, every='improvement',monitor='valid_loss', name='best')]) x_sample = torch.rand((2,input_channels,sample_duration,sample_size,sample_size)) x_sample = x_sample.to(devices[0]) model_scripted = torch.jit.trace(model_single,x_sample) model_scripted.to('cpu') model_scripted.save(str(output_dir/model_name))