def train_from_scratch(): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) ml_logger = MLFlowLogger(tracking_uri="") ml_logger.init_experiment(experiment_name="from_scratch", run_name="debug") ######################### ######## Settings ######################## set_all_seeds(seed=39) device, n_gpu = initialize_device_settings(use_cuda=True) evaluate_every = 5000 vocab_size = 30522 # dev_filename = None save_dir = Path("saved_models/train_from_scratch") n_epochs = 10 learning_rate = 1e-4 warmup_proportion = 0.05 batch_size = 16 # (probably only possible via gradient accumulation steps) max_seq_len = 64 data_dir = Path("data/lm_finetune_nips") train_filename = "train.txt" dev_filename = "dev.txt" # 1.Create a tokenizer tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset processor = BertStyleLMProcessor( data_dir=data_dir, tokenizer=tokenizer, max_seq_len=max_seq_len, train_filename=train_filename, dev_filename=dev_filename, test_filename=None, ) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and # calculates a few descriptive statistics of our datasets stream_data_silo = StreamingDataSilo(processor=processor, batch_size=batch_size) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.from_scratch("bert", vocab_size) # b) and *two* prediction heads on top that are suited for our task => Language Model finetuning lm_prediction_head = BertLMHead(768, vocab_size) next_sentence_head = NextSentenceHead([768, 2], task_name="nextsentence") model = AdaptiveModel( language_model=language_model, prediction_heads=[lm_prediction_head, next_sentence_head], embeds_dropout_prob=0.1, lm_output_types=["per_token", "per_sequence"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=learning_rate, schedule_opts={ "name": "LinearWarmup", "warmup_proportion": warmup_proportion }, n_batches=len(stream_data_silo.get_data_loader("train")), n_epochs=n_epochs, device=device, grad_acc_steps=8, ) # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time trainer = Trainer.create_or_load_checkpoint( model=model, optimizer=optimizer, data_silo=stream_data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, grad_acc_steps=8, checkpoint_root_dir=Path( "saved_models/train_from_scratch/checkpoints"), ) # 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai trainer.train() # 8. Hooray! You have a model. Store it: model.save(save_dir) processor.save(save_dir)
def train_from_scratch(): args = parse_arguments() use_amp = "O2" # using "O2" here allows roughly 30% larger batch_sizes and 45% speed up logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) # Only the main process should log here if args.local_rank in [-1, 0]: ml_logger = MLFlowLogger( tracking_uri="https://public-mlflow.deepset.ai/") ml_logger.init_experiment(experiment_name="train_from_scratch", run_name="run") set_all_seeds(seed=39) device, n_gpu = initialize_device_settings(use_cuda=True, local_rank=args.local_rank, use_amp=use_amp) save_dir = Path("saved_models/train_from_scratch") data_dir = Path("data/test") # Option A) just using a single file # train_filename = "train.txt" # Option B) (recommended when using StreamingDataSilo): # split and shuffle that file to have random order within and across epochs randomize_and_split_file(data_dir / "train.txt", output_dir=Path("data/split_files"), docs_per_file=1000) train_filename = Path("data/split_files") dev_filename = "dev.txt" distributed = args.local_rank != -1 max_seq_len = 128 batch_size = 8 #if distributed: this is per_gpu grad_acc = 1 learning_rate = 1e-4 warmup_proportion = 0.05 n_epochs = 2 evaluate_every = 15000 log_loss_every = 2 checkpoint_every = 500 checkpoint_root_dir = Path("checkpoints") checkpoints_to_keep = 4 next_sent_pred_style = "bert-style" #or "sentence" max_docs = None # Choose enough workers to queue sufficient batches during training. # Optimal number depends on your GPU speed, CPU speed and number of cores # 16 works well on a 4x V100 machine with 16 cores (AWS: p3.8xlarge). For a single GPU you will need less. data_loader_workers = 1 # 1.Create a tokenizer tokenizer = Tokenizer.load("bert-base-uncased", do_lower_case=True) # 2. Create a DataProcessor that handles all the conversion from raw text into a PyTorch Dataset processor = BertStyleLMProcessor(data_dir=data_dir, tokenizer=tokenizer, max_seq_len=max_seq_len, train_filename=train_filename, dev_filename=dev_filename, test_filename=None, next_sent_pred_style=next_sent_pred_style, max_docs=max_docs) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and # calculates a few descriptive statistics of our datasets # stream_data_silo = DataSilo(processor=processor, batch_size=batch_size, distributed=distributed) stream_data_silo = StreamingDataSilo( processor=processor, batch_size=batch_size, distributed=distributed, dataloader_workers=data_loader_workers) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.from_scratch("bert", tokenizer.vocab_size) # b) and *two* prediction heads on top that are suited for our task => Language Model finetuning lm_prediction_head = BertLMHead(768, tokenizer.vocab_size) next_sentence_head = NextSentenceHead(num_labels=2, task_name="nextsentence") model = AdaptiveModel( language_model=language_model, prediction_heads=[lm_prediction_head, next_sentence_head], embeds_dropout_prob=0.1, lm_output_types=["per_token", "per_sequence"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=learning_rate, schedule_opts={ "name": "LinearWarmup", "warmup_proportion": warmup_proportion }, n_batches=len(stream_data_silo.get_data_loader("train")), n_epochs=n_epochs, device=device, grad_acc_steps=grad_acc, distributed=distributed, use_amp=use_amp, local_rank=args.local_rank) # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time trainer = Trainer.create_or_load_checkpoint( model=model, optimizer=optimizer, data_silo=stream_data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, log_loss_every=log_loss_every, device=device, grad_acc_steps=grad_acc, local_rank=args.local_rank, checkpoint_every=checkpoint_every, checkpoint_root_dir=checkpoint_root_dir, checkpoints_to_keep=checkpoints_to_keep, use_amp=use_amp) # 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai trainer.train() # 8. Hooray! You have a model. Store it: model.save(save_dir) processor.save(save_dir) if args.local_rank != -1: torch.distributed.destroy_process_group()
def train_from_scratch(): # We need the local rank argument for DDP args = parse_arguments() use_amp = "O2" logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) ml_logger = MLFlowLogger(tracking_uri="") ml_logger.init_experiment(experiment_name="train_from_scratch", run_name="run") set_all_seeds(seed=39) # device, n_gpu = initialize_device_settings(use_cuda=True) device, n_gpu = initialize_device_settings(use_cuda=True, local_rank=args.local_rank, use_amp=use_amp) evaluate_every = 10000 save_dir = Path("saved_models/train_from_scratch") data_dir = Path("data/lm_finetune_nips") train_filename = "train.txt" # dev_filename = "dev.txt" max_seq_len = 128 batch_size = 80 grad_acc = 3 learning_rate = 0.0001 warmup_proportion = 0.01 n_epochs = 5 vocab_file = "bert-base-uncased-vocab.txt" # 1.Create a tokenizer tokenizer = BertTokenizer(data_dir / vocab_file, do_lower_case=True) # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # 2. Create a DataProcessor that handles all the conversion from raw text into a PyTorch Dataset # limiting max docs to divisible of 64 (world_size * num_workers) processor = BertStyleLMProcessor(data_dir=data_dir, tokenizer=tokenizer, max_seq_len=max_seq_len, train_filename=train_filename, dev_filename=None, test_filename=None) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and # calculates a few descriptive statistics of our datasets stream_data_silo = StreamingDataSilo(processor=processor, batch_size=batch_size, distributed=True, dataloader_workers=16) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.from_scratch("bert", tokenizer.vocab_size) # b) and *two* prediction heads on top that are suited for our task => Language Model finetuning lm_prediction_head = BertLMHead(768, tokenizer.vocab_size) next_sentence_head = NextSentenceHead([768, 2], task_name="nextsentence") model = AdaptiveModel( language_model=language_model, prediction_heads=[lm_prediction_head, next_sentence_head], embeds_dropout_prob=0.1, lm_output_types=["per_token", "per_sequence"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=learning_rate, schedule_opts={ "name": "LinearWarmup", "warmup_proportion": warmup_proportion }, n_batches=len(stream_data_silo.get_data_loader("train")), n_epochs=n_epochs, device=device, grad_acc_steps=grad_acc, distributed=True, use_amp=use_amp, local_rank=args.local_rank) # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time # if args.get("checkpoint_every"): # checkpoint_every = int(args["checkpoint_every"]) # checkpoint_root_dir = Path("/opt/ml/checkpoints/training") # else: checkpoint_every = None checkpoint_root_dir = None trainer = Trainer.create_or_load_checkpoint( model=model, optimizer=optimizer, data_silo=stream_data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, grad_acc_steps=grad_acc, checkpoint_every=checkpoint_every, checkpoint_root_dir=checkpoint_root_dir, use_amp=use_amp, ) # 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai trainer.train()
def train_from_scratch(args): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) #TODO prettify this loading of params from two sources (cmd + json) cmd_args = parse_arguments() args["local_rank"] = cmd_args.local_rank logging.info(f'local_rank: {args["local_rank"]}') next_sent_task = bool(int(args.get("next_sent_task", 1))) distributed = True use_amp = args.get("use_amp", None) use_amp = None if use_amp == "" else use_amp # Only the main process should log here if args["local_rank"] in [-1, 0]: ml_logger = StdoutLogger(tracking_uri=None) ml_logger.init_experiment(experiment_name="train_from_scratch", run_name="run") set_all_seeds(seed=39) device, n_gpu = initialize_device_settings(use_cuda=True, local_rank=args["local_rank"], use_amp=use_amp) effective_batch_size = int(args["per_gpu_batch_size"]) * int( args["gradient_accumulation_steps"] ) * torch.distributed.get_world_size() logging.info( f'Training with effective batch size of {effective_batch_size} ' f'(per_gpu_batch_size = {int(args["per_gpu_batch_size"])}, gradient_accumulation_steps={int(args["gradient_accumulation_steps"])}, n_gpus = {torch.distributed.get_world_size()} )' ) save_dir = Path("/opt/ml/model") data_dir = Path("/opt/ml/input/data/input_channel") # Split and shuffle training data if args["local_rank"] in [-1, 0]: randomize_and_split_file(data_dir / args["train_file"], output_dir=data_dir / "split_files") # let other processes wait for splitted files from rank 0 torch.distributed.barrier() args["train_file"] = data_dir / "split_files" # 1.Create a tokenizer tokenizer = BertTokenizer(data_dir / args["vocab_file"], do_lower_case=bool(int(args["do_lower_case"]))) # 2. Create a DataProcessor that handles all the conversion from raw text into a PyTorch Dataset processor = BertStyleLMProcessor(data_dir=data_dir, tokenizer=tokenizer, max_seq_len=int(args["max_seq_len"]), train_filename=args.get("train_file"), dev_filename=args.get("dev_file", None), test_filename=args.get("test_file", None), next_sent_pred_style=args.get( "next_sent_pred_style", "bert-style"), max_docs=args.get("max_docs", None), next_sent_pred=next_sent_task) # 3. Create a DataSilo that loads several datasets (train/dev/test) and provides DataLoaders for them data_silo = StreamingDataSilo(processor=processor, batch_size=int(args["per_gpu_batch_size"]), dataloader_workers=int( args.get("data_loader_workers", 8)), distributed=distributed) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.from_scratch("bert", tokenizer.vocab_size) # b) and *two* prediction heads on top that are suited for our task => Language Model finetuning lm_prediction_head = BertLMHead(768, tokenizer.vocab_size) if next_sent_task: next_sentence_head = NextSentenceHead(num_labels=2, task_name="nextsentence") model = AdaptiveModel( language_model=language_model, prediction_heads=[lm_prediction_head, next_sentence_head], embeds_dropout_prob=0.1, lm_output_types=["per_token", "per_sequence"], device=device, ) else: model = AdaptiveModel( language_model=language_model, prediction_heads=[lm_prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=float(args["learning_rate"]), schedule_opts={ "name": "LinearWarmup", "warmup_proportion": float(args["warmup_proportion"]) }, n_batches=len(data_silo.get_data_loader("train")), n_epochs=int(args["n_epochs"]), device=device, grad_acc_steps=int(args["gradient_accumulation_steps"]), distributed=distributed, use_amp=use_amp, local_rank=args["local_rank"]) # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time if args.get("checkpoint_every"): checkpoint_every = int(args["checkpoint_every"]) checkpoint_root_dir = Path("/opt/ml/checkpoints/training") else: checkpoint_every = None checkpoint_root_dir = None trainer = Trainer.create_or_load_checkpoint( model=model, optimizer=optimizer, data_silo=data_silo, epochs=int(args["n_epochs"]), n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=int(args["evaluate_every"]), log_loss_every=int(args.get("log_loss_every", 500)), log_learning_rate=bool(int(args.get("log_learning_rate", 0))), device=device, local_rank=args["local_rank"], grad_acc_steps=int(args["gradient_accumulation_steps"]), checkpoint_every=checkpoint_every, checkpoint_root_dir=checkpoint_root_dir, checkpoints_to_keep=int(args.get("checkpoints_to_keep", 10)), disable_tqdm=True, use_amp=use_amp, ) # 7. Let it grow! trainer.train() # 8. Hooray! You have a model. Store it: model.save(save_dir) processor.save(save_dir)
def text_pair_classification(): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO) ml_logger = MLFlowLogger(tracking_uri="https://public-mlflow.deepset.ai/") ml_logger.init_experiment(experiment_name="Public_FARM", run_name="Run_text_pair_classification") ########################## ########## Settings ###### ########################## set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=True) n_epochs = 2 batch_size = 32 evaluate_every = 500 lang_model = "bert-base-cased" label_list = ["0", "1"] # 1.Create a tokenizer tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=False) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset # We do not have a sample dataset for regression yet, add your own dataset to run the example processor = TextPairClassificationProcessor( tokenizer=tokenizer, label_list=label_list, metric="acc", label_column_name="label", max_seq_len=64, train_filename="train.tsv", test_filename="test.tsv", dev_filename="dev.tsv", #dev_split = 0.5, data_dir=Path("/mnt/data/datasets/patents/patent_matching"), tasks={"text_classification"}, delimiter="\t") # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets data_silo = StreamingDataSilo(processor=processor, batch_size=batch_size) #Alte Version vor StreamingDataSilo #data_silo = DataSilo( # processor=processor, # batch_size=batch_size, max_processes=4) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.load(lang_model) # b) and a prediction head on top that is suited for our task prediction_head = TextClassificationHead( num_labels=len(label_list), class_weights=[0.56, 0.44] # Todo: Reihenfolge checken ) model = AdaptiveModel.load("saved_models/text_pair_classification_model", device=device) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer(model=model, learning_rate=5e-6, device=device, n_batches=1466, n_epochs=n_epochs) # An early stopping instance can be used to save the model that performs best on the dev set # according to some metric and stop training when no improvement is happening for some iterations. earlystopping = EarlyStopping( #metric="f1_weighted", mode="max", # use f1_macro from the dev evaluator of the trainer metric="loss", mode="min", # use loss from the dev evaluator of the trainer save_dir=Path("saved_models/text_pair_classification_model_Tuned" ), # where to save the best model patience= 2 # number of evaluations to wait for improvement before terminating the training ) # 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time trainer = Trainer(model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, early_stopping=earlystopping) # 7. Let it grow trainer.train() # 8. Hooray! You have a model. Store it: save_dir = Path("saved_models/text_pair_classification_model") model.save(save_dir) processor.save(save_dir) # 9. Load it & harvest your fruits (Inference) # Add your own text adapted to the dataset you provide basic_texts = [ { "text": "<claim-text>The method of claim 10, wherein the indium metal layer is 10 nm to 100 µm thick.</claim-text>", "text_b": "<p id=" "p0001" " num=" "0001" ">The present invention is directed to metal plating compositions and methods. More specifically, the present invention is directed to metal plating compositions and methods which provide improved leveling and throwing power.</p <p id=" "p0039" " num=" "0039" ">One or more conventional surfactants may be used. Typically, surfactants include, but are not limited to, nonionic surfactants such as alkyl phenoxy polyethoxyethanols. Other suitable surfactants containing multiple oxyethylene groups also may be used. Such surfactants include compounds of polyoxyethylene polymers having from as many as 20 to 150 repeating units. Such compounds also may perform as suppressors. Also included in the class of polymers are both block and random copolymers of polyoxyethylene (EO) and polyoxypropylene (PO). Surfactants may be added in conventional amounts, such as from 0.05 g/L to 20 g/L or such as from 0.5 g/L to 5 g/L.</p <p id=" "p0040" " num=" "0040" ">Conventional levelers include, but are not limited to, one or more of alkylated polyalkyleneimines and organic sulfo sulfonates. Examples of such compounds include, 4-mercaptopyridine, 2-mercaptothiazoline, ethylene thiourea, thiourea, 1-(2-hydroxyethyl)-2-imidazolidinethion (HIT) and alkylated polyalkyleneimines. Such levelers are included in conventional amounts. Typically, such levelers are included in amounts of 1ppb to 1 g/L, or such as from 10ppb to 500ppm.</p <p id=" "p0042" " num=" "0042" ">Alkali metal salts which may be included in the plating compositions include, but are not limited to, sodium and potassium salts of halogens, such as chloride, fluoride and bromide. Typically chloride is used. Such alkali metal salts are used in conventional amounts.</p <p id=" "p0053" " num=" "0053" ">The metal plating compositions may be used to plate a metal or metal alloy on a substrate by any method known in the art and literature. Typically, the metal or metal alloy is electroplated using conventional electroplating processes with conventional apparatus. A soluble or insoluble anode may be used with the electroplating compositions.</p <p id=" "p0022" " num=" "0022" ">One or more sources of metal ions are included in metal plating compositions to plate metals. The one or more sources of metal ions provide metal ions which include, but are not limited to, copper, tin, nickel, gold, silver, palladium, platinum and indium. Alloys include, but are not limited to, binary and ternary alloys of the foregoing metals. Typically, metals chosen from copper, tin, nickel, gold, silver or indium are plated with the metal plating compositions. More typically, metals chosen from copper, tin, silver or indium are plated. Most typically, copper is plated.</p <p id=" "p0030" " num=" "0030" ">Indium salts which may be used include, but are not limited to, one or more of indium salts of alkane sulfonic acids and aromatic sulfonic acids, such as methanesulfonic acid, ethanesulfonic acid, butane sulfonic acid, benzenesulfonic acid and toluenesulfonic acid, salts of sulfamic acid, sulfate salts, chloride and bromide salts of indium, nitrate salts, hydroxide salts, indium oxides, fluoroborate salts, indium salts of carboxylic acids, such as citric acid, acetoacetic acid, glyoxylic acid, pyruvic acid, glycolic acid, malonic acid, hydroxamic acid, iminodiacetic acid, salicylic acid, glyceric acid, succinic acid, malic acid, tartaric acid, hydroxybutyric acid, indium salts of amino acids, such as arginine, aspartic acid, asparagine, glutamic acid, glycine, glutamine, leucine, lysine, threonine, isoleucine, and valine.</p" }, { "text": "<claim-text>A toner comprising: <claim-text>toner base particles; and</claim-text> <claim-text>an external additive,</claim-text> <claim-text>the toner base particles each comprising a binder resin and a colorant,</claim-text> <claim-text>wherein the external additive comprises coalesced particles,</claim-text> <claim-text>wherein the coalesced particles are each a non-spherical secondary particle in which primary particles are coalesced together, and</claim-text> <claim-text>wherein an index of a particle size distribution of the coalesced particles is expressed by the following Formula (1): <maths id=" "math0004" " num=" "(formula (1)" "><math display=" "block" "><mfrac><msub><mi>Db</mi><mn>50</mn></msub><msub><mi>Db</mi><mn>10</mn></msub></mfrac><mo>≦</mo><mn>1.20</mn></math><img id=" "ib0008" " file=" "imgb0008.tif" " wi=" "93" " he=" "21" " img-content=" "math" " img-format=" "tif" "/></maths><br/> where, in a distribution diagram in which particle diameters in nm of the coalesced particles are on a horizontal axis and cumulative percentages in % by number of the coalesced particles are on a vertical axis and in which the coalesced particles are accumulated from the coalesced particles having smaller particle diameters to the coalesced particles having larger particle diameters, Db<sub>50</sub> denotes a particle diameter of the coalesced particle at which the cumulative percentage is 50% by number, and Db<sub>10</sub> denotes a particle diameter of the coalesced particle at which the cumulative percentage is 10% by number.</claim-text></claim-text>", "text_b": "<p id=" "p0177" " num=" "0177" ">For a similar reason, it is preferred that the electroconductive fine powder has a volume-average particle size of 0.5 - 5 µm, more preferably 0.8 - 5 µm, further preferably 1.1 - 5 µm and has a particle size distribution such that particles of 0.5 µm or smaller occupy at most 70 % by volume and particles of 5.0 µm or larger occupy at most 5 % by number.</p <p id=" "p0189" " num=" "0189" ">The volume-average particle size and particle size distribution of the electroconductive fine powder described herein are based on values measured in the following manner. A laser diffraction-type particle size distribution measurement apparatus (" "Model LS-230" ", available from Coulter Electronics Inc.) is equipped with a liquid module, and the measurement is performed in a particle size range of 0.04 - 2000 µm to obtain a volume-basis particle size distribution. For the measurement, a minor amount of surfactant is added to 10 cc of pure water and 10 mg of a sample electroconductive fine powder is added thereto, followed by 10 min. of dispersion by means of an ultrasonic disperser (ultrasonic homogenizer) to obtain a sample dispersion liquid, which is subjected to a single time of measurement for 90 sec.</p <p id=" "p0191" " num=" "0191" ">In the case where the electroconductive fine powder is composed of agglomerate particles, the particle size of the electroconductive fine powder is determined as the particle size of the agglomerate. The electroconductive fine powder in the form of agglomerated secondary particles can be used as well as that in the form of primary particles. Regardless of its agglomerated form, the electroconductive fine powder can exhibit its desired function of charging promotion by presence in the form of the agglomerate in the charging section at the contact position<!-- EPO <DP n=" "85" "> --> between the charging member and the image-bearing member or in a region in proximity thereto.</p" }, ] model = Inferencer.load(save_dir) result = model.inference_from_dicts(dicts=basic_texts) print(result)
def train_from_scratch(args): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) ml_logger = MLFlowLogger(tracking_uri=args.get( "mlflow_tracking_uri", "file:/opt/ml/model/mlflow")) ml_logger.init_experiment(experiment_name="train_from_scratch", run_name="run") set_all_seeds(seed=39) device, n_gpu = initialize_device_settings(use_cuda=True) evaluate_every = int(args["evaluate_every"]) save_dir = Path("/opt/ml/model") data_dir = Path("/opt/ml/input/data/input_channel") # 1.Create a tokenizer tokenizer = BertTokenizer(data_dir / args["vocab_file"], do_lower_case=args["do_lower_case"]) # 2. Create a DataProcessor that handles all the conversion from raw text into a PyTorch Dataset processor = BertStyleLMProcessor( data_dir=data_dir, tokenizer=tokenizer, max_seq_len=int(args["max_seq_len"]), train_filename=args["train_file"], dev_filename=args.get("dev_file", None), test_filename=args.get("test_file", None), ) # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and # calculates a few descriptive statistics of our datasets stream_data_silo = StreamingDataSilo(processor=processor, batch_size=int(args["batch_size"])) # 4. Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.from_scratch("bert", tokenizer.vocab_size) # b) and *two* prediction heads on top that are suited for our task => Language Model finetuning lm_prediction_head = BertLMHead(768, tokenizer.vocab_size) next_sentence_head = NextSentenceHead([768, 2], task_name="nextsentence") model = AdaptiveModel( language_model=language_model, prediction_heads=[lm_prediction_head, next_sentence_head], embeds_dropout_prob=0.1, lm_output_types=["per_token", "per_sequence"], device=device, ) # 5. Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=float(args["learning_rate"]), schedule_opts={ "name": "LinearWarmup", "warmup_proportion": float(args["warmup_proportion"]) }, n_batches=len(stream_data_silo.get_data_loader("train")), n_epochs=int(args["n_epochs"]), device=device, grad_acc_steps=int(args["gradient_accumulation_steps"]), ) # 6. Feed everything to the Trainer, which keeps care of growing our model and evaluates it from time to time if args.get("checkpoint_every"): checkpoint_every = int(args["checkpoint_every"]) checkpoint_root_dir = Path("/opt/ml/checkpoints/training") else: checkpoint_every = None checkpoint_root_dir = None trainer = Trainer.create_or_load_checkpoint( model=model, optimizer=optimizer, data_silo=stream_data_silo, epochs=int(args["n_epochs"]), n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, grad_acc_steps=int(args["gradient_accumulation_steps"]), checkpoint_every=checkpoint_every, checkpoint_root_dir=checkpoint_root_dir, ) # 7. Let it grow! Watch the tracked metrics live on the public mlflow server: https://public-mlflow.deepset.ai trainer.train() # 8. Hooray! You have a model. Store it: model.save(save_dir) processor.save(save_dir)