def get_commits(pr_number, owner='gisce', repository='erp'): # Pagination documentation: https://developer.github.com/v3/#pagination def parse_github_links_header(links_header): ret_links = {} full_links = links_header.split(',') for link in full_links: link_url, link_ref = link.split(';') link_url = link_url.strip()[1:-1] link_ref = link_ref.split('=')[-1].strip()[1:-1] ret_links[link_ref] = link_url return ret_links logger.info('Getting commits from GitHub') headers = {'Authorization': 'token %s' % github_config()['token']} repo = github_config( repository='{}/{}'.format(owner, repository))['repository'] url = "https://api.github.com/repos/%s/pulls/%s/commits?per_page=100" \ % (repo, pr_number) r = requests.get(url, headers=headers) commits = json.loads(r.text) if 'link' in r.headers: url_page = 1 links = parse_github_links_header(r.headers['link']) while links['last'][-1] != str(url_page): url_page += 1 tqdm.write(colors.yellow( ' - Getting extra commits page {}'.format(url_page))) r = requests.get(links['next'], headers=headers) commits += json.loads(r.text) return commits
def check_it_exists(src='/home/erp/src', repository='erp', sudo_user='******'): with settings(hide('everything'), sudo_user=sudo_user, warn_only=True): res = sudo("ls {}/{}".format(src, repository)) if res.return_code: message = "The repository does not exist or cannot be found" tqdm.write(colors.red(message)) abort(message)
def catch_result(self, result): for line in result.split('\n'): if re.match('Applying: ', line): tqdm.write(colors.green(line)) self.pbar.update() if result.failed: if "git config --global user.email" in result: logger.error( "Need to configure git for this user\n" ) raise GitHubException(result) try: raise WiggleException except WiggleException: if self.auto_exit: sudo("git am --abort") logger.error('Aborting deploy and go back') raise GitHubException prompt("Manual resolve...") finally: if not self.auto_exit: to_commit = sudo( "git diff --cached --name-only --no-color", pty=False ) if to_commit: self.resolve() else: self.skip()
def tprint(string): """Print string via `tqdm` so that it doesnt interfere with a progressbar. """ try: tqdm.write(string) except: print(string)
def _print_epoch_means(self): last_val_accs = np.array(self.validation_accuracies) v_mean = np.mean(last_val_accs[-801:-1]) last_train_accs = np.array(self.train_accuracies) t_mean = np.mean(last_train_accs[-801:-1]) #tqdm.write('EPOCH %d:'%(self.epoch)) tqdm.write('training => %.5f / val => %.5f'%(t_mean,v_mean))
def tpv2tan_hdr(img, ota): image = odi.reprojpath+'reproj_'+ota+'.'+img.stem() # change the CTYPENs to be TANs if they aren't already tqdm.write('TPV -> TAN in {:s}'.format(image)) iraf.imutil.hedit.setParam('images',image) iraf.imutil.hedit.setParam('fields','CTYPE1') iraf.imutil.hedit.setParam('value','RA---TAN') iraf.imutil.hedit.setParam('add','yes') iraf.imutil.hedit.setParam('addonly','no') iraf.imutil.hedit.setParam('verify','no') iraf.imutil.hedit.setParam('update','yes') iraf.imutil.hedit(show='no', mode='h') iraf.imutil.hedit.setParam('images',image) iraf.imutil.hedit.setParam('fields','CTYPE2') iraf.imutil.hedit.setParam('value','DEC--TAN') iraf.imutil.hedit.setParam('add','yes') iraf.imutil.hedit.setParam('addonly','no') iraf.imutil.hedit.setParam('verify','no') iraf.imutil.hedit.setParam('update','yes') iraf.imutil.hedit(show='no', mode='h') # delete any PV keywords # leaving them in will give you trouble with the img wcs iraf.unlearn(iraf.imutil.hedit) iraf.imutil.hedit.setParam('images',image) iraf.imutil.hedit.setParam('fields','PV*') iraf.imutil.hedit.setParam('delete','yes') iraf.imutil.hedit.setParam('verify','no') iraf.imutil.hedit.setParam('update','yes') iraf.imutil.hedit(show='no', mode='h')
def resync_invoiceitems(apps, schema_editor): """ Since invoiceitem IDs were not previously stored (the ``stripe_id`` field held the id of the linked subsription), a direct migration will leave us with a bunch of orphaned objects. It was decided [here](https://github.com/kavdev/dj-stripe/issues/162) that a purge and re-sync would be the best option for subscriptions. That's being extended to InvoiceItems. No data that is currently available on stripe will be deleted. Anything stored locally will be purged. """ # This is okay, since we're only doing a forward migration. from djstripe.models import InvoiceItem from djstripe.context_managers import stripe_temporary_api_version with stripe_temporary_api_version("2016-03-07"): if InvoiceItem.objects.count(): print("Purging invoiceitems. Don't worry, all invoiceitems will be re-synced from stripe. Just in case you \ didn't get the memo, we'll print out a json representation of each object for your records:") print(serializers.serialize("json", InvoiceItem.objects.all())) InvoiceItem.objects.all().delete() print("Re-syncing invoiceitems. This may take a while.") for stripe_invoiceitem in tqdm(iterable=InvoiceItem.api_list(), desc="Sync", unit=" invoiceitems"): invoice = InvoiceItem.sync_from_stripe_data(stripe_invoiceitem) if not invoice.customer: tqdm.write("The customer for this invoiceitem ({invoiceitem_id}) does not exist \ locally (so we won't sync the invoiceitem). You'll want to figure out how that \ happened.".format(invoiceitem_id=stripe_invoiceitem['id'])) print("InvoiceItem re-sync complete.")
def resync_subscriptions(apps, schema_editor): """ Since subscription IDs were not previously stored, a direct migration will leave us with a bunch of orphaned objects. It was decided [here](https://github.com/kavdev/dj-stripe/issues/162) that a purge and re-sync would be the best option. No data that is currently available on stripe will be deleted. Anything stored locally will be purged. """ # This is okay, since we're only doing a forward migration. from djstripe.models import Subscription from djstripe.context_managers import stripe_temporary_api_version with stripe_temporary_api_version("2016-03-07"): if Subscription.objects.count(): print("Purging subscriptions. Don't worry, all active subscriptions will be re-synced from stripe. Just in \ case you didn't get the memo, we'll print out a json representation of each object for your records:") print(serializers.serialize("json", Subscription.objects.all())) Subscription.objects.all().delete() print("Re-syncing subscriptions. This may take a while.") for stripe_subscription in tqdm(iterable=Subscription.api_list(), desc="Sync", unit=" subscriptions"): subscription = Subscription.sync_from_stripe_data(stripe_subscription) if not subscription.customer: tqdm.write("The customer for this subscription ({subscription_id}) does not exist locally (so we \ won't sync the subscription). You'll want to figure out how that \ happened.".format(subscription_id=stripe_subscription['id'])) print("Subscription re-sync complete.")
def check_trace(self, step_method): """Tests whether the trace for step methods is exactly the same as on master. Code changes that effect how random numbers are drawn may change this, and require `master_samples` to be updated, but such changes should be noted and justified in the commit. This method may also be used to benchmark step methods across commits, by running, for example ``` BENCHMARK=100000 ./scripts/test.sh -s pymc3/tests/test_step.py:TestStepMethods ``` on multiple commits. """ test_steps = 100 n_steps = int(os.getenv('BENCHMARK', 100)) benchmarking = (n_steps != test_steps) if benchmarking: tqdm.write('Benchmarking {} with {:,d} samples'.format(step_method.__name__, n_steps)) else: tqdm.write('Checking {} has same trace as on master'.format(step_method.__name__)) with Model(): Normal('x', mu=0, sd=1) trace = sample(n_steps, step=step_method(), random_seed=1) if not benchmarking: assert_array_almost_equal(trace.get_values('x'), self.master_samples[step_method])
def check_alignments(self, filename): """ If we have no alignments for this image, skip it """ have_alignments = self.faces.have_face(filename) if not have_alignments: tqdm.write("No alignment found for {}, " "skipping".format(os.path.basename(filename))) return have_alignments
def mal(mal_title, mal_id=False): cookies = {"incap_ses_224_81958":"P6tYbUr7VH9V6shgudAbA1g5FVYAAAAAyt7eDF9npLc6I7roc0UIEQ=="} response = requests.get( "http://myanimelist.net/api/anime/search.xml", params={'q':mal_title}, cookies=cookies, auth=("zodman1","zxczxc"), headers = {'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36'}) content = response.content if not mal_id is False: for e in xpath.search(content,"//entry"): if mal_id in e: content = e break tqdm.write("%s %s"%((mal_title,), mal_id)) id = xpath.get(content, "//id") title = xpath.get(content, "//title") title_en = xpath.get(content, "//english") type_ = xpath.get(content, "//type") synonyms = xpath.get(content, "//synonyms") status = xpath.get(content, "//status") synopsys = translate(xpath.get(content, "//synopsis"),"es") img = xpath.get(content, "//image") episodes = xpath.get(content,"//episodes") resumen = synopsys.replace("<br />", " ").replace("\n\r","") resumen = translate(resumen,'es') status = translate(status,'es') assert id is not "", mal_title data=dict(title=title, title_en=title_en, type=type_, status=status, resumen=resumen, img=img,episodes=episodes, synonyms=synonyms,id=id, synopsys=synopsys) return MalResult(**data)
def handle_single(self, filename, verbosity, remove): tqdm.write("Work on {!r}".format(filename)) basename = os.path.splitext(os.path.basename(filename))[0] tree = ET.parse(filename) root = tree.getroot() words = defaultdict(lambda: {'words': set(), 'fichas': list()}) fichas = root.findall('./ficha') for ficha in tqdm(fichas, desc=basename, leave=False): lemma = ''.join(ficha.find('./lema').itertext()).strip() data = ImportSM.work_on_ficha(ficha) data = [it[0] for it in data] for it in data: try: w = Word.objects.get(word=it.encode('utf-8')) words[w.pk]['words'].add(it) words[w.pk]['fichas'].append((filename, ficha.attrib['ID'], ficha)) except Word.DoesNotExist: tqdm.write("not found {}".format(lemma)) # Detect duplicates! dupes = {k: v for k, v in words.items() if len(v['words']) > 1} if dupes: remove_msg = " (will be removed!)" tqdm.write("...found {} duplicates{}".format(len(dupes), remove_msg)) for w, values in dupes.items(): word = Word.objects.get(pk=w) tqdm.write(" - pk: {!r}: {}".format(w, word)) for ficha in values['fichas']: tqdm.write(" + {}: [ID={!r}] {}".format(ficha[0], ficha[1], ''.join(ficha[2].find('./lema').itertext()).strip())) if remove: word.delete()
def check_alignments(self, frame): """ If we have no alignments for this image, skip it """ have_alignments = self.alignments.frame_exists(frame) if not have_alignments: tqdm.write("No alignment found for {}, " "skipping".format(frame)) return have_alignments
def emit(self, record): try: msg = self.format(record) tqdm.write(msg, file=self.stream, end=self.terminator) self.flush() except Exception: # pylint: disable=broad-except self.handleError(record)
def get_exec_times(graph): # Get execution times for reports (-m option) basename,_ = os.path.splitext(os.path.basename(graph)) reports = glob.glob("*"+basename + "*.csv") reports.sort(reverse=True, key= lambda f: os.path.getmtime(f)) csvfile = reports[0] tqdm.write("Retrieving monitoring info from "+ csvfile) return get_costs(csvfile)
def train(self, episodes=500, max_step=200): for episode in tqdm(range(episodes)): if episode % 50 == 0: total_reward = self._test_impl(max_step, delay=0, gui=False) tqdm.write('current reward: {total_reward}'.format(total_reward=total_reward)) else: # train step self._train_impl(max_step)
def check_am_session(src='/home/erp/src', repository='erp', sudo_user='******'): with settings(hide('everything'), sudo_user=sudo_user, warn_only=True): with cd("{}/{}".format(src, repository)): res = sudo("ls .git/rebase-apply") if not res.return_code: message = "The repository is in the middle of an am session!" tqdm.write(colors.red(message)) abort(message)
def check_is_rolling(src='/home/erp/src', repository='erp', sudo_user='******'): with settings(hide('everything'), sudo_user=sudo_user, warn_only=True): with cd("{}/{}".format(src, repository)): res = sudo("git branch | grep '* rolling'") if res.return_code: message = "The repository is not in rolling mode" tqdm.write(colors.red(message)) abort(message)
def _saveSession(self, sess): """ Save the model parameters and the variables Args: sess: the current session """ tqdm.write('Checkpoint reached: saving model (don\'t stop the run)...') self.saveModelParams() self.saver.save(sess, self._getModelName()) # TODO: Put a limit size (ex: 3GB for the modelDir) tqdm.write('Model saved.')
def _write(self, msg): """ Write error messages to the progress bar, if using one, otherwise to stderr """ if self._progbar: tqdm.write(msg) else: print(msg, file= sys.stderr)
def _evaluate(self, sess, dataset_name, return_extras=False): y_pred = np.zeros(self.dataset.num_examples(dataset_name), dtype=np.int32) tqdm.write('Running evaluation for dataset %s' % dataset_name, file=sys.stderr) for step, dataset_chunk in self.dataset.traverse_dataset(dataset_name, self.batch_size): y_pred[step:min(step+self.batch_size, self.dataset.num_examples(dataset_name))] =\ self.predict(sess, dataset_chunk) y_true = self.dataset.dataset_labels(dataset_name, self.cl_iteration) return self.get_metrics(y_true, y_pred, return_extras=return_extras)
def illumination_corrections(image_to_correct, correction_image, corrected_image,do_correction=True): #print image_to_correct,correction_image,corrected_image iraf.unlearn(iraf.imutil.imarith,iraf.imfilter.median) iraf.imutil.imarith.setParam('operand1',image_to_correct) iraf.imutil.imarith.setParam('op','/') if do_correction == True: iraf.imutil.imarith.setParam('operand2',odi.skyflatpath+correction_image) else: tqdm.write('not applying illcor') iraf.imutil.imarith.setParam('operand2',1.0) iraf.imutil.imarith.setParam('result',odi.illcorpath+corrected_image) iraf.imutil.imarith(mode='h')
def mainTrain(self, sess): """ Training loop Args: sess: The current running session """ # Specific training dependent loading self.textData.makeLighter(self.args.ratioDataset) # Limit the number of training samples mergedSummaries = tf.summary.merge_all() # Define the summary operator (Warning: Won't appear on the tensorboard graph) if self.globStep == 0: # Not restoring from previous run self.writer.add_graph(sess.graph) # First time only # If restoring a model, restore the progression bar ? and current batch ? print('Start training (press Ctrl+C to save and exit)...') try: # If the user exit while training, we still try to save the model for e in range(self.args.numEpochs): print() print("----- Epoch {}/{} ; (lr={}) -----".format(e+1, self.args.numEpochs, self.args.learningRate)) batches = self.textData.getBatches() # TODO: Also update learning parameters eventually tic = datetime.datetime.now() for nextBatch in tqdm(batches, desc="Training"): # Training pass ops, feedDict = self.model.step(nextBatch) assert len(ops) == 2 # training, loss _, loss, summary = sess.run(ops + (mergedSummaries,), feedDict) self.writer.add_summary(summary, self.globStep) self.globStep += 1 # Output training status if self.globStep % 100 == 0: perplexity = math.exp(float(loss)) if loss < 300 else float("inf") tqdm.write("----- Step %d -- Loss %.2f -- Perplexity %.2f" % (self.globStep, loss, perplexity)) # Checkpoint if self.globStep % self.args.saveEvery == 0: self._saveSession(sess) toc = datetime.datetime.now() print("Epoch finished in {}".format(toc-tic)) # Warning: Will overflow if an epoch takes more than 24 hours, and the output isn't really nicer except (KeyboardInterrupt, SystemExit): # If the user press Ctrl+C while testing progress print('Interruption detected, exiting the program...') self._saveSession(sess) # Ultimate saving before complete exit
def _saveSession(self, sess): """ Save the model parameters and the variables Args: sess: the current session """ tqdm.write('Checkpoint reached: saving model (don\'t stop the run)...') self.saveModelParams() model_name = self._getModelName() with open(model_name, 'w') as f: # HACK: Simulate the old model existance to avoid rewriting the file parser f.write('This file is used internally by DeepQA to check the model existance. Please do not remove.\n') self.saver.save(sess, model_name) # TODO: Put a limit size (ex: 3GB for the modelDir) tqdm.write('Model saved.')
def handle(self, *args, **kwargs): results = [] for fansub, users in tqdm(NYAA_USERS.items()): f,_ = Fansub.objects.get_or_create(name=fansub) for user in users: offset = 1 while True: results_ = nyaa.search(user=user,offset=offset) if not results_: break offset +=1 results+=results_ for res in tqdm(results): flag_next = False for j in BYPASS: if j.lower() in res.title.lower(): flag_next = True break if flag_next: continue date = make_aware(res.date, is_dst=False) torrent,created = Torrent.objects.get_or_create(full=res.title, url=res.link.replace("download","view"), defaults=dict(download_url = res.link.replace("view","download"), date=date)) tqdm.write("%s %s " % (res.title, res.link )) full = res.title data = guessit.guessit(full, {"episode_prefer_number":True, 'expected_group':RELEASE_GROUPS}) title = data.get("title") kwargs_ = {} if title in MAL_ANIMES: search_title, mal_id = MAL_ANIMES[title] title = search_title kwargs_ = {'mal_id':mal_id} try: mal_data = mal(title,**kwargs_) except: continue anime,_ = Anime.objects.get_or_create(slug=slugify(data.get("title")), defaults={"title":data.get("title")}) release_group,_ = ReleaseGroup.objects.get_or_create(name=data.get("release_group")) mal_obj, _ = MALMeta.objects.get_or_create(mal_id=mal_data.id) mal_obj.title = mal_data.title mal_obj.image = mal_data.img mal_obj.synopsys = mal_data.synopsys mal_obj.resumen = mal_data.resumen mal_obj.synonyms = mal_data.synonyms mal_obj.title_en = mal_data.title_en mal_obj.status = mal_data.status mal_obj.save() meta,_ = MetaTorrent.objects.get_or_create( anime=anime, torrent=torrent, release_group=release_group, mal=mal_obj) meta.episode=data.get("episode", data.get("episode_title")) meta.format=data.get("format", data.get("screen_size")) meta.save()
def add_files(self, filenames): files = [] # Look for these files in the index for filename in tqdm([os.path.abspath(x) for x in filenames], leave=False): tqdm.write("Indexing {}".format(filename)) if filename in self._names: files.append(self._update_if_required(filename)) else: logger.debug("Adding new file to index: {}".format(filename)) entry = entry_for_file(filename) self._process_entries([entry]) files.append(entry) return files
def dir_bruter(word_queue, target_url, stime, extensions=None, pbar=None): while not word_queue.empty(): pbar.update(1) attempt = word_queue.get() attempt_list = [] # 检查是否有文件扩展名,如果没有就是我们要暴力破解的路径 # if "." not in attempt: # attempt_list.append("%s/" % attempt) # else: attempt_list.append("%s" % attempt) # 如果我们想暴力扩展 if extensions: for extension in extensions: if extension == ".swp": attempt_list.append("/.%s%s" % (attempt.strip('/'), extension)) else: attempt_list.append("%s%s" % (attempt, extension)) # 迭代我们想要尝试的文件列表 for brute in attempt_list: url = "%s%s" % (target_url, urllib.quote(brute)) # print url try: headers = {} headers["User-Agent"] = conf['ua'] r = urllib2.Request(url, headers=headers) # pbar.update(1) try: response = urllib2.urlopen(r, timeout=2) except: logger.error("Time out...") continue # 有可能卡死 # 请求完成后睡眠 time.sleep(stime) if response.code != 404: logger.info("Get !!!!" + url) tqdm.write("[%d] => %s" % (response.code, url)) except urllib2.URLError, e: if hasattr(e, 'code') and e.code != 404: tqdm.write("!!! %d => %s" % (e.code, url))
def emit(self, record): msg = self.format(record) # Handle logging on several lines msg = msg.replace( '\n', '\n_' + ' ' * (len(msg) - len(record.message) - 1)) # Add color for reg, color in self.color_subst: msg = re.sub(reg, color + r'\1' + Fore.RESET + Style.RESET_ALL, msg) tqdm.write(msg)
def _crawl(self, url: URL, save: bool = True) -> Any: try: data = requests.get(url).json() except json.JSONDecodeError as err: tqdm.write(f"JSON decode failure: {url}") return None if save: out_data = json.dumps(data, indent=4, sort_keys=True) out_data = out_data.replace(str(self._src_url), "") file = self._dest_dir.joinpath((url / "index.json").path[1:]) file.parent.mkdir(parents=True, exist_ok=True) file.write_text(out_data) return data
def sync_charges(apps, schema_editor): # This is okay, since we're only doing a forward migration. from djstripe.models import Charge from djstripe.context_managers import stripe_temporary_api_version with stripe_temporary_api_version("2016-03-07"): if Charge.objects.count(): print("syncing charges. This may take a while.") for charge in tqdm(Charge.objects.all(), desc="Sync", unit=" charges"): try: Charge.sync_from_stripe_data(charge.api_retrieve()) except InvalidRequestError: tqdm.write("There was an error while syncing charge ({charge_id}).".format(charge_id=charge.stripe_id)) print("Charge sync complete.")
def _log_epoch(self, engine): self.pbar.refresh() tqdm.write("Epoch: {} - avg loss: {:.5f}".format( engine.state.epoch, self.running_loss / self.n_batches)) self.running_loss = 0 self.pbar.n = self.pbar.last_print_n = 0
out = tx.Activation(logits, tx.softmax) labels = tx.dense_one_hot(loss_inputs.tensor, vocab_size) loss = tf.reduce_mean(tx.categorical_cross_entropy(labels=labels, logits=logits.tensor)) # setup optimizer optimizer = tx.AMSGrad(learning_rate=0.01) model = tx.Model(run_inputs=in_layer, run_outputs=out, train_inputs=in_layer, train_outputs=out, train_in_loss=loss_inputs, train_out_loss=loss, eval_out_score=loss, eval_in_score=loss_inputs) print(model.feedable_train()) runner = tx.ModelRunner(model) runner.config_optimizer(optimizer) runner.init_vars() # need to fix the runner interface to allow for lists to be received data = np.array([[0, 1], [1, 0]]) targets = np.array([[2], [3]]) for i in tqdm(range(10000)): runner.train(model_input_data=data, loss_input_data=targets) if i % 1000 == 0: loss = runner.eval(data, targets) tqdm.write("loss: {}".format(loss))
def pipeline(self, args): def evaluate(args, dloader): model = self.pargs.modelinstance # Turn on evaluation mode which disables dropout. model.eval() test_loss_batch = torch.zeros(len(dloader)) ids = [] predictions = [] logprobs = [] targets = [] with torch.no_grad(): for batch_i, batch_data in enumerate( tqdm(dloader, ncols=89, desc='Test ')): loss, (sampleids, outputs, predictions_, targets_) = process(batch_data, istraining=False) if args.l1reg > 0: reg_loss = l1reg(model) loss += args.l1reg * reg_loss # keep track of some scores test_loss_batch[batch_i] = loss.item() ids.extend(sampleids.tolist()) logprobs.extend(outputs.data.tolist()) predictions.extend(predictions_.tolist()) targets.extend(targets_.tolist()) test_loss = test_loss_batch.mean() return test_loss, ids, logprobs, predictions, targets, test_loss_batch def l1reg(model): # add l1 regularization reg_loss = 0 for param_i, param in enumerate(model.parameters()): if param is None: continue reg_loss += torch.functional.F.l1_loss( param, target=torch.zeros_like(param), size_average=False) reg_loss /= (param_i + 1) return reg_loss def train(args): model = self.pargs.modelinstance # Turn on training mode which enables dropout. model.train() train_loss_batch = torch.zeros(len(self.pargs.trainloader)) sample_i = 0 report_i = 0 report_interval_begin_sample = 0 report_interval_begin_batch = 0 predictions = [] targets = [] for batch_i, batch_data in enumerate( tqdm(self.pargs.trainloader, ncols=89, desc='Train')): batch_start_time = time.time() model.zero_grad() loss, (_, outputs, batch_predictions, batch_targets) = process(batch_data, istraining=True) if args.l1reg > 0: reg_loss = l1reg(model) loss += args.l1reg * reg_loss loss.backward() self.pargs.modeloptimizingscheduler.step() # track some scores train_loss_batch[batch_i] = loss.item() predictions.extend(batch_predictions.tolist()) targets.extend(batch_targets.tolist()) sample_i += batch_targets.size(0) if ((sample_i - report_interval_begin_sample) // self.pargs.report_after_n_samples) > 0: cur_loss = train_loss_batch[report_interval_begin_batch:( batch_i + 1)].mean() cur_scores = self.getscores( targets[report_interval_begin_sample:], predictions[report_interval_begin_sample:]) cum_scores = self.getscores(targets, predictions) tqdm.write( self.message_status_interval( '*** training status ***', report_i + 1, args.status_reports, epoch, args.epochs, report_interval_begin_batch, batch_i + 1, self.pargs.ntrainbatches, args.batch_size, report_interval_begin_sample, len(targets), self.pargs.ntrainsamples, batch_start_time, cur_loss, train_loss_batch.mean(), cur_scores, cum_scores)) report_interval_begin_sample = len(targets) report_interval_begin_batch = batch_i + 1 report_i += 1 train_loss = train_loss_batch.mean() return train_loss, predictions, targets, train_loss_batch ### # Run pipeline ### best_run = utils.AttributeHolder(test_val=float('-inf'), epoch=0) args.status_reports = min(args.status_reports, self.pargs.ntrainbatches) self.pargs.report_after_n_samples = math.ceil( self.pargs.ntrainsamples / (args.status_reports + 1)) process = self.pargs.modelprocessfun for epoch in tqdm(range(1, args.epochs + 1), ncols=89, desc='Epochs'): epoch_start_time = time.time() train_loss_cum, train_predictions_cum, train_targets_cum, _ = train( args) train_scores_cum = self.getscores(train_targets_cum, train_predictions_cum) # test training set train_loss, train_sampleids, train_logprobs, train_predictions, train_targets, _ = evaluate( args, self.pargs.trainloader) train_scores = self.getscores(train_targets, train_predictions) # test test set if self.pargs.testset: test_loss, test_sampleids, test_logprobs, test_predictions, test_targets, _ = evaluate( args, self.pargs.testloader) test_scores = self.getscores(test_targets, test_predictions, extended=True) else: test_loss, test_sampleids, test_logprobs, test_predictions, test_targets = train_loss, train_sampleids, train_logprobs, train_predictions, train_targets test_scores = train_scores # print scores status_message = self.message_status_endepoch( '', epoch, epoch_start_time, self.pargs.modeloptimizer.getLearningRate(), train_loss, test_loss, train_scores, test_scores, best_run) tqdm.write(status_message) if best_run.test_val < test_scores[ self.pargs.best_run_test_valname]: tqdm.write( f''' > Saving model and prediction results to '{args.model:s}'...''' ) self.savemodel(args, epoch, status_message, suffix='') self.savepredictions(args, test_sampleids, test_logprobs, test_predictions, test_targets, test_scores, suffix=f'') best_run.test_valname = self.pargs.best_run_test_valname best_run.test_val = test_scores[best_run.test_valname] best_run.epoch = epoch best_run.train_scores_cum = train_scores_cum best_run.train_scores = train_scores best_run.test_scores = test_scores best_run.train_loss = train_loss best_run.test_loss = test_loss tqdm.write(' > ... Finished saving\n |') # save final model and scores tqdm.write( f''' > Saving final model and prediction results to '{args.model:s}'...''' ) self.savemodel(args, epoch, status_message, suffix='-final') self.savepredictions(args, test_sampleids, test_logprobs, test_predictions, test_targets, test_scores, suffix='-final') tqdm.write(' > ... Finished saving\n |')
val_acc /= len(dev.dataset) writer.add_scalars('loss', { 'train': train_loss / (step + 1), 'val': val_loss }, epoch * len(train) + step) model.train() train_loss /= (step + 1) train_acc /= (step + 1) tr_summary = {'loss': train_loss, 'acc': train_acc} val_summary = {'loss': val_loss, 'acc': val_acc} scheduler.step(val_summary['loss']) tqdm.write('epoch : {}, tr_loss: {:.3f}, val_loss: ' '{:.3f}, tr_acc: {:.2%}, val_acc: {:.2%}'.format( epoch + 1, tr_summary['loss'], val_summary['loss'], tr_summary['acc'], val_summary['acc'])) val_loss = val_summary['loss'] is_best = val_loss < best_val_loss if is_best: state = { 'epoch': epoch + 1, 'model_state_dict': model.state_dict(), 'opt_state_dict': optimizer.state_dict() } summary = {'train': tr_summary, 'validation': val_summary} sm.update(summary) sm.save('summary.json')
if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('gt_map', help='groundtruth seqmap') parser.add_argument('gt_folder', help='groundtruth label folder') parser.add_argument('res_folder', help='results folder') parser.add_argument("--category", nargs="+", default=["Car"], choices=["Car", "Pedestrian"]) parser.add_argument("--flag", type=str) parser.add_argument("--d_min", type=float, default=0) parser.add_argument("--d_max", type=float, default=100, required=True) args = parser.parse_args() tqdm.write(f"args = {args}") out_filename = os.path.join( args.res_folder, f"{args.flag}_{int(args.d_min)}_{int(args.d_max)}.txt") tqdm.write(f"output file name = {out_filename}") eval_tracks( args.gt_map, args.gt_folder, args.res_folder, args.category, args.d_min, args.d_max, out_filename, )
def evaluate(gt_datas: dict, pd_datas: dict, d_min: float, d_max: float, out_file: TextIO) -> None: """Evaluate tracking output. Args: gt_datas: path to dataset pd_datas: list of path to tracker output d_min: minimum distance range d_max: maximum distance range out_file: output file object """ acc_c = mm.MOTAccumulator(auto_id=True) acc_i = mm.MOTAccumulator(auto_id=True) acc_o = mm.MOTAccumulator(auto_id=True) ID_gt_all: List[str] = [] count_all: int = 0 fr_count: int = 0 tqdm.write(f"{len(pd_datas)} {len(gt_datas)}") assert len(pd_datas) == len(gt_datas) pbar = tqdm(zip(pd_datas.items(), gt_datas.items()), total=len(gt_datas)) for (log_id_pd, pd_data), (log_id_gt, gt_data) in pbar: fr_count += len(pd_data['frames']) pbar.set_postfix_str(s=f"Logs: {log_id_gt} AccumFrames: {fr_count} | " f"PD: {len(pd_data['frames'])} " f"GT: {len(gt_data['frames'])}]") assert len(pd_data['frames']) == len(gt_data['frames']) assert log_id_pd == log_id_gt for (_, hypos), (_, annos) in \ zip(pd_data['frames'].items(), gt_data['frames'].items()): # Get entries in GT and PD gt, id_gts = create_entry(annos['annotations'], d_min, d_max) tracks, id_tracks = create_entry(hypos['annotations'], d_min, d_max) ID_gt_all.append(np.unique(id_gts).tolist()) dists_c: List[List[float]] = [] dists_i: List[List[float]] = [] dists_o: List[List[float]] = [] for _, gt_value in gt.items(): gt_track_data_c: List[float] = [] gt_track_data_i: List[float] = [] gt_track_data_o: List[float] = [] dists_c.append(gt_track_data_c) dists_i.append(gt_track_data_i) dists_o.append(gt_track_data_o) for _, track_value in tracks.items(): count_all += 1 gt_track_data_c.append( get_distance(gt_value, track_value, "centroid")) gt_track_data_i.append( get_distance(gt_value, track_value, "iou")) gt_track_data_o.append( get_distance(gt_value, track_value, "orientation")) acc_c.update(id_gts, id_tracks, dists_c) acc_i.update(id_gts, id_tracks, dists_i) acc_o.update(id_gts, id_tracks, dists_o) ID_gt_all = np.unique([item for lists in ID_gt_all for item in lists]) if count_all == 0: # fix for when all hypothesis is empty, # pymotmetric currently doesn't support this, see https://github.com/cheind/py-motmetrics/issues/49 acc_c.update(id_gts, [-1], np.ones(np.shape(id_gts)) * np.inf) acc_i.update(id_gts, [-1], np.ones(np.shape(id_gts)) * np.inf) acc_o.update(id_gts, [-1], np.ones(np.shape(id_gts)) * np.inf) tqdm.write("Computing...") summary = mh.compute( acc_c, metrics=[ "num_frames", "mota", "motp", "idf1", "mostly_tracked", "mostly_lost", "num_false_positives", "num_misses", "num_switches", "num_fragmentations", ], name="acc", ) tqdm.write(f"summary = \n{summary}") num_tracks = len(ID_gt_all) if num_tracks == 0: num_tracks = 1 num_frames = summary["num_frames"][0] mota = summary["mota"][0] * 100 motp_c = summary["motp"][0] idf1 = summary["idf1"][0] most_track = summary["mostly_tracked"][0] / num_tracks most_lost = summary["mostly_lost"][0] / num_tracks num_fp = summary["num_false_positives"][0] num_miss = summary["num_misses"][0] num_switch = summary["num_switches"][0] num_frag = summary["num_fragmentations"][0] #acc_c.events.loc[acc_c.events.Type != "RAW", # "D"] = acc_i.events.loc[acc_c.events.Type != "RAW", "D"] sum_motp_i = mh.compute(acc_i, metrics=["motp"], name="acc") tqdm.write(f"MOTP-I = \n{sum_motp_i}") motp_i = sum_motp_i["motp"][0] # acc_c.events.loc[acc_c.events.Type != "RAW", # "D"] = acc_o.events.loc[acc_c.events.Type != "RAW", "D"] sum_motp_o = mh.compute(acc_o, metrics=["motp"], name="acc") tqdm.write(f"MOTP-O = \n{sum_motp_o}") motp_o = sum_motp_o["motp"][0] out_string = (f"{num_frames} {mota:.2f} " f"{motp_c:.2f} {motp_o:.2f} {motp_i:.2f} " f"{idf1:.2f} {most_track:.2f} {most_lost:.2f} " f"{num_fp} {num_miss} {num_switch} {num_frag}\n") out_file.write(out_string)
def main(): checkpoint_path = args.checkpoint_path base_dir = checkpoint_path.parent.parent.parent.parent snapshot_name = checkpoint_path.parent.parent.name lmdb_dir = (base_dir / 'lmdb' / snapshot_name) with (lmdb_dir / 'meta.json').open('r') as f: meta_dict = json.load(f) mat_id_to_label = meta_dict['mat_id_to_label'] label_to_mat_id = {v: k for k, v in mat_id_to_label.items()} with (checkpoint_path.parent / 'model_params.json').open('r') as f: model_params = json.load(f) color_binner = None if 'color_hist_space' in model_params: color_binner = ColorBinner( space=model_params['color_hist_space'], shape=tuple(model_params['color_hist_shape']), sigma=tuple(model_params['color_hist_sigma']), ) print(f'Loading checkpoint from {checkpoint_path!s}') checkpoint = torch.load(checkpoint_path) if not args.out_name: # TODO: remove this ugly thing. (There's no reason to the +1 we did) out_name = str(checkpoint['epoch'] - 1) else: out_name = args.out_name model_name = checkpoint_path.parent.name out_dir = (base_dir / 'inference' / snapshot_name / model_name / out_name) model = RendNet3.from_checkpoint(checkpoint) model.train(False) model = model.cuda() yy = input(f'Will save to {out_dir!s}, continue? (y/n): ') if yy != 'y': return out_dir.mkdir(exist_ok=True, parents=True) filters = [] if args.category: filters.append(ExemplarShapePair.shape.has(category=args.category)) print(f'Loading pairs') with session_scope() as sess: pairs, count = controllers.fetch_pairs_default(sess, filters=filters) materials = sess.query(models.Material).all() mat_by_id = {m.id: m for m in materials} pairs = [ pair for pair in pairs if args.overwrite or not (Path(out_dir, f'{pair.id}.json').exists()) ] pbar = tqdm(pairs) for pair in pbar: out_path = Path(out_dir, f'{pair.id}.json') if not args.overwrite and out_path.exists(): continue if not pair.data_exists(config.PAIR_SHAPE_CLEAN_SEGMENT_MAP_NAME): tqdm.write(f'clean segment map not exists') continue pbar.set_description(f'Pair {pair.id}') exemplar = pair.exemplar shape = (224, 224) exemplar_im = pair.exemplar.load_cropped_image() exemplar_im = skimage.transform.resize(exemplar_im, shape, anti_aliasing=True, order=3, mode='constant', cval=1) # if not exemplar.data_exists(exemplar.get_image_name(shape)): # exemplar_im = resize(pair.exemplar.load_cropped_image(), # shape, order=3) # exemplar.save_data(exemplar.get_image_name(shape), exemplar_im) # else: # exemplar_im = exemplar.load_data(exemplar.get_image_name(shape)) segment_map = pair.load_data( config.PAIR_SHAPE_CLEAN_SEGMENT_MAP_NAME) - 1 substance_map = pair.exemplar.load_data(config.EXEMPLAR_SUBST_MAP_NAME) substance_map = resize(substance_map, segment_map.shape, order=0) vis.image(exemplar_im.transpose((2, 0, 1)), win='exemplar-image') result_dict = {'pair_id': pair.id, 'segments': {}} subst_id_by_seg_id = compute_segment_substances( pair, return_ids=True, segment_map=segment_map, substance_map=substance_map) for seg_id in [s for s in np.unique(segment_map) if s >= 0]: seg_mask = (segment_map == seg_id) topk_dict = compute_topk( label_to_mat_id, model, exemplar_im, seg_mask, minc_substance=SUBSTANCES[subst_id_by_seg_id[seg_id]], color_binner=color_binner, mat_by_id=mat_by_id) result_dict['segments'][str(seg_id)] = topk_dict with open(Path(out_path), 'w') as f: json.dump(result_dict, f, indent=2)
args = parser.parse_args() # Reference # https://towardsdatascience.com/the-easiest-way-to-download-youtube-videos-using-python-2640958318ab file = open(args.urls, 'r') lines = file.readlines() done = [] totalLengthSeconds = 0 for i, line in enumerate(tqdm(lines, desc='Downloading', unit='video')): line = line.strip() sections = line.split("#") if len(sections) > 1: line = sections[0].strip() if len(line) <= 0: continue tqdm.write(line) if not line in done: name = "YouTube" while name == "YouTube": try: video = YouTube(line) name = video.title if name == "YouTube": tqdm.write("Bad name") continue tqdm.write('Video: "' + name + '"') if len( video.streams.filter(file_extension="mp4").filter( res=str(args.resolution) + 'p', fps=args.fps)) != 1: for s in video.streams.filter(
def emit(self, record): msg = self.format(record) tqdm.write(msg)
'labels': batch[2] } # Inputs to the model: https://huggingface.co/transformers/model_doc/bert.html#bertforsequenceclassification outputs = model(**inputs) # Returns a tuple of loss and the logits loss = outputs[0] loss_train_total += loss.item() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 1.0) optimizer.step() scheduler.step() progress_bar.set_postfix( {'training_loss': '{}'.format(loss.item() / len(batch))}) tqdm.write('\nEpoch {}'.format(epoch)) loss_train_avg = loss_train_total / len(dataloader_train) tqdm.write('Training Loss: {}'.format(loss_train_avg)) val_loss, predictions, true_vals = evaluate(dataloader_val) val_f1 = f1_score_func(predictions, true_vals) tqdm.write('Validation Loss: {}'.format(val_loss)) tqdm.write('F1 score(weighted): {}'.format(val_f1)) if val_f1 > best_f1: best_f1 = val_f1 print('Saving the model...') torch.save(model.state_dict(), './Models/best_model.model')
def download(directory, username, password, size, recent, \ until_found, download_videos, force_size, auto_delete, \ smtp_username, smtp_password, notification_email): """Download all iCloud photos to a local directory""" if not notification_email: notification_email = smtp_username icloud = authenticate(username, password, smtp_username, smtp_password, notification_email) directory = os.path.normpath(directory) print("Looking up all photos...") photos = icloud.photos.all photos_count = len(photos) # Optional: Only download the x most recent photos. if recent is not None: photos_count = recent photos = (p for i, p in enumerate(photos) if i < recent) kwargs = {'total': photos_count} if until_found is not None: del kwargs['total'] photos_count = '???' # ensure photos iterator doesn't have a known length photos = (p for p in photos) if download_videos: print("Downloading %s %s photos and videos to %s/ ..." % (photos_count, size, directory)) else: print("Downloading %s %s photos to %s/ ..." % (photos_count, size, directory)) consecutive_files_found = 0 progress_bar = tqdm(photos, **kwargs) for photo in progress_bar: for _ in range(MAX_RETRIES): try: if not download_videos \ and not photo.filename.lower().endswith(('.png', '.jpg', '.jpeg')): progress_bar.set_description( "Skipping %s, only downloading photos." % photo.filename) continue created_date = photo.created date_path = '{:%Y/%m/%d}'.format(created_date) download_dir = os.path.join(directory, date_path) if not os.path.exists(download_dir): os.makedirs(download_dir) download_path = local_download_path(photo, size, download_dir) if os.path.isfile(download_path): if until_found is not None: consecutive_files_found += 1 progress_bar.set_description( "%s already exists." % truncate_middle(download_path, 96)) break download_photo(photo, download_path, size, force_size, download_dir, progress_bar) if until_found is not None: consecutive_files_found = 0 break except (requests.exceptions.ConnectionError, socket.timeout): tqdm.write('Connection failed, retrying after %d seconds...' % WAIT_SECONDS) time.sleep(WAIT_SECONDS) else: tqdm.write("Could not process %s! Maybe try again later." % photo.filename) if until_found is not None and consecutive_files_found >= until_found: tqdm.write( 'Found %d consecutive previusly downloaded photos. Exiting' % until_found) progress_bar.close() break print("All photos have been downloaded!") if auto_delete: print("Deleting any files found in 'Recently Deleted'...") recently_deleted = icloud.photos.albums['Recently Deleted'] for media in recently_deleted: created_date = media.created date_path = '{:%Y/%m/%d}'.format(created_date) download_dir = os.path.join(directory, date_path) filename = filename_with_size(media, size) path = os.path.join(download_dir, filename) if os.path.exists(path): print("Deleting %s!" % path) os.remove(path)
def dataTesting(self, dataSourceA, dataSourceB): data = newTesting(dataSourceA, dataSourceB) for choice in (dataSourceA[1], dataSourceB[1]): num_data = data.DataNum[choice] # 获取数据的数量 ran_num = random.randint(0, num_data - 1) # 获取一个随机数 overall_p = 0 overall_n = 0 overall_tp = 0 overall_tn = 0 start = time.time() # data_count = 200 pbar = tqdm(range(num_data)) for i in pbar: stuff = data.GetData( (ran_num + i) % num_data, dataType=choice, feature_type=self.featureType) # 从随机数开始连续向后取一定数量数据 if self.featureType in ('both', 'Both', 'BOTH'): dataSpec, dataMfcc, data_labels = stuff data_input = [dataSpec, dataMfcc] else: data_input, data_labels = stuff data_input = data_input[np.newaxis, :] data_pre = self.model.predict_on_batch(data_input) if self.voting == False: predictions = np.argmax(data_pre[0], axis=0) else: predictions = sum([ np.argmax(element[0], axis=0) for element in data_pre ]) predictions = 1 if predictions >= 2 else 0 tp, fp, tn, fn = Comapare2(predictions, data_labels[0]) # 计算metrics overall_p += tp + fn overall_n += tn + fp overall_tp += tp overall_tn += tn if overall_p != 0: sensitivity = overall_tp / overall_p * 100 sensitivity = round(sensitivity, 2) else: sensitivity = 'None' if overall_n != 0: specificity = overall_tn / overall_n * 100 specificity = round(specificity, 2) else: specificity = 'None' if sensitivity != 'None' and specificity != 'None': score = (sensitivity + specificity) / 2 score = round(score, 2) else: score = 'None' accuracy = (overall_tp + overall_tn) / (overall_p + overall_n) * 100 accuracy = round(accuracy, 2) end = time.time() dtime = round(end - start, 2) strg = '*[泛化性测试结果] 片段类型【{0}】 敏感度:{1}%, 特异度: {2}%, 得分: {3}, 准确度: {4}%, 用时: {5}s.'.format( choice, sensitivity, specificity, score, accuracy, dtime) tqdm.write(strg) pbar.close()
def write_above_single_progress_bar(self, seq_no, line): tqdm.write(line)
def train(self): try: best_accuracy = 0 epoch_log_file = os.path.join(self._result_log_base_path, "epoch_result.log") curr_learning = self._config["learning_rate"] minimum_learning_rate = self._config["minimum_learning_rate"] last_10_accuracy = 0.0 for epoch in tqdm(range(self._epoches)): self._train_data_iterator.shuffle() losses = list() total = 0 train_correct = 0 file = os.path.join( self._result_log_base_path, "test_" + self._curr_time + "_" + str(epoch) + ".log") for i in tqdm(range( self._train_data_iterator.batch_per_epoch)): batch = self._train_data_iterator.get_batch() batch.learning_rate = curr_learning tag_predictions, segment_length_predictions, loss, optimizer, feed_dict = self._train_model.train( batch) tag_predictions, segment_length_predictions, loss, optimizer = self._session.run( (tag_predictions, segment_length_predictions, loss, optimizer), feed_dict=feed_dict) total += batch.size train_correct += self._check_predictions( tag_predictions=tag_predictions, segment_length_predictions=segment_length_predictions, ground_truth=batch.ground_truth, ground_truth_segment_length=batch. ground_truth_segment_length, ground_truth_segmentation_length=batch. ground_truth_segmentation_length, question_length=batch.questions_length) losses.append(loss) train_acc = train_correct / total self._dev_data_iterator.shuffle() dev_accuracy = self.test(self._dev_data_iterator, is_log=False) average_loss = np.average(np.array(losses)) tqdm.write( "epoch: %d, loss: %f, train_acc: %f, dev_acc: %f, learning_rate: %f" % (epoch, average_loss, train_acc, dev_accuracy, curr_learning)) if dev_accuracy > best_accuracy: best_accuracy = dev_accuracy self._saver.save(self._session, self._best_checkpoint_file) # Learning rate decay: if epoch > 0 and epoch % 20 == 0: if dev_accuracy <= last_10_accuracy and curr_learning > minimum_learning_rate: curr_learning /= 2 last_10_accuracy = dev_accuracy self._epoch_log(file=epoch_log_file, num_epoch=epoch, train_accuracy=train_acc, dev_accuracy=dev_accuracy, average_loss=average_loss) except (KeyboardInterrupt, SystemExit): # If the user press Ctrl+C... # Save the model # tqdm.write("===============================") # tqdm.write(str(self._batch.word_character_matrix)) # tqdm.write("*******************************") # tqdm.write(str(self._batch.word_character_length)) # tqdm.write("===============================") self._saver.save(self._session, self._checkpoint_file) except ValueError as e: print(e)
def train( net, optimizer, criterion, data_loader, epoch, scheduler=None, display_iter=100, device=torch.device("cpu"), display=None, val_loader=None, supervision="full", ): """ Training loop to optimize a network for several epochs and a specified loss Args: net: a PyTorch model optimizer: a PyTorch optimizer data_loader: a PyTorch dataset loader epoch: int specifying the number of training epochs criterion: a PyTorch-compatible loss function, e.g. nn.CrossEntropyLoss device (optional): torch device to use (defaults to CPU) display_iter (optional): number of iterations before refreshing the display (False/None to switch off). scheduler (optional): PyTorch scheduler val_loader (optional): validation dataset supervision (optional): 'full' or 'semi' """ if criterion is None: raise Exception("Missing criterion. You must specify a loss function.") net.to(device) save_epoch = epoch // 20 if epoch > 20 else 1 losses = np.zeros(1000000) mean_losses = np.zeros(100000000) iter_ = 1 loss_win, val_win = None, None val_accuracies = [] for e in tqdm(range(1, epoch + 1), desc="Training the network"): # Set the network to training mode net.train() avg_loss = 0.0 # Run the training loop for one epoch for batch_idx, (data, target) in tqdm(enumerate(data_loader), total=len(data_loader)): # Load the data into the GPU if required data, target = data.to(device), target.to(device) optimizer.zero_grad() if supervision == "full": output = net(data) loss = criterion(output, target) elif supervision == "semi": outs = net(data) output, rec = outs loss = criterion[0]( output, target) + net.aux_loss_weight * criterion[1](rec, data) else: raise ValueError( 'supervision mode "{}" is unknown.'.format(supervision)) loss.backward() optimizer.step() avg_loss += loss.item() losses[iter_] = loss.item() mean_losses[iter_] = np.mean(losses[max(0, iter_ - 100):iter_ + 1]) if display_iter and iter_ % display_iter == 0: string = "Train (epoch {}/{}) [{}/{} ({:.0f}%)]\tLoss: {:.6f}" string = string.format( e, epoch, batch_idx * len(data), len(data) * len(data_loader), 100.0 * batch_idx / len(data_loader), mean_losses[iter_], ) update = None if loss_win is None else "append" loss_win = display.line( X=np.arange(iter_ - display_iter, iter_), Y=mean_losses[iter_ - display_iter:iter_], win=loss_win, update=update, opts={ "title": "Training loss", "xlabel": "Iterations", "ylabel": "Loss", }, ) tqdm.write(string) if len(val_accuracies) > 0: val_win = display.line( Y=np.array(val_accuracies), X=np.arange(len(val_accuracies)), win=val_win, opts={ "title": "Validation accuracy", "xlabel": "Epochs", "ylabel": "Accuracy", }, ) iter_ += 1 del (data, target, loss, output) # Update the scheduler avg_loss /= len(data_loader) if val_loader is not None: val_acc = val(net, val_loader, device=device, supervision=supervision) val_accuracies.append(val_acc) metric = -val_acc else: metric = avg_loss if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau): scheduler.step(metric) elif scheduler is not None: scheduler.step() # Save the weights if e % save_epoch == 0: save_model( net, camel_to_snake(str(net.__class__.__name__)), data_loader.dataset.name, epoch=e, metric=abs(metric), )
loss = torch.nn.BCELoss() loss_list = [] for e in tqdm(range(num_epochs)): total_loss = 0.0 for batch in data_loader: optimizer.zero_grad() model_input = batch[0] label = batch[1].view(-1, 1) model_output = model(model_input) l = loss(model_output, label) l.backward() optimizer.step() total_loss += l tqdm.write("loss: " + str(total_loss.item())) loss_list.append(total_loss.item()) if (args.save_model): from pathlib import Path print("Saving the trained model") this_filepath = Path(os.path.abspath(__file__)) this_dirpath = this_filepath.parent model_path = os.path.join(this_dirpath, "model") if not (os.path.exists(model_path)): os.makedirs(model_path) model_path = os.path.join(model_path, "fnn.log") torch.save(model, model_path) plt.plot(loss_list)
import requests from bs4 import BeautifulSoup from tqdm import tqdm to_process = [] with open("errors.txt", "r") as infile: to_process = [int(l.strip()) for l in infile.read().splitlines()] for i in tqdm(to_process): # latest document as of Feb 2 2018 # try: tqdm.write("Processing document #" + str(i)) document = requests.get( "http://www.presidency.ucsb.edu/ws/print.php?pid=" + str(i)).text soup = BeautifulSoup(document, 'html.parser') title = soup.title.contents[0].replace("\xa0", " ").replace("/", ":") if len(title) > 200: title = title[:97] + "..." + title[-100:] content = soup.find('span', {'class': 'style9'}).text with open(title + "." + str(i) + ".txt", "w") as outfile: outfile.write(content) # except Exception as e: # print(e) # with open("errors2.txt", "a") as outfile: # outfile.write(str(i) + "\n") # continue
def train_loop(self, epoch, train_loader, optimizer, args): """ Run a train loop. :param epoch: the epoch # (used for logging) :param train_loader: a torch.utils.data.DataLoader generated from data.datamgr.SetDataManager :param optimizer: a torch.optim.Optimzer :param args: other args passed to the script :returns: a dictionary of metrics: train_acc, train_loss, cls_loss, and lang_loss if applicable """ avg_loss = 0 avg_cls_loss = 0 avg_lang_loss = 0 acc_all = [] for i, (x, target, (lang, lang_length, lang_mask)) in enumerate(train_loader): self.n_query = x.size(1) - self.n_support optimizer.zero_grad() if self.lsl or self.l3: # Load language # Trim padding to max length in batch max_lang_length = lang_length.max() lang = lang[:, :, :max_lang_length] lang_mask = lang_mask[:, :, :max_lang_length] lang = lang.cuda() lang_length = lang_length.cuda() lang_mask = lang_mask.cuda() # ==== CLASSIFICATION LOSS ===- if self.l3: cls_loss, z_support, z_query = self.set_forward_loss_l3( x, (lang, lang_length), return_z=True) else: cls_loss, z_support, z_query = self.set_forward_loss( x, return_z=True) loss = cls_loss # ==== LANGUAGE LOSS ==== if self.lsl or self.l3: lang_loss = self.set_lang_loss(z_support, z_query, lang, lang_length, lang_mask) lang_loss = args.lang_lambda * lang_loss loss = loss + lang_loss avg_lang_loss = avg_lang_loss + lang_loss.item() loss.backward() optimizer.step() avg_loss = avg_loss + loss.item() avg_cls_loss = avg_cls_loss + cls_loss.item() if self.l3: # Stick to just 1 inference at train time since evaluating # accuracy is expensive correct_this, count_this = self.correct_l3(x, n_infer=1) else: correct_this, count_this = self.correct(x) acc_all.append(correct_this / count_this * 100) metrics = { "train_acc": None, "train_loss": None, "cls_loss": None, "lang_loss": None, } metrics["train_loss"] = avg_loss / (i + 1) metrics["cls_loss"] = avg_cls_loss / (i + 1) tqdm.write("Epoch {:d} | Loss {:f}".format(epoch, metrics["train_loss"])) if self.lsl: metrics["lang_loss"] = avg_lang_loss / (i + 1) tqdm.write("Epoch {:d} | Lang Loss {:f}".format( epoch, metrics["lang_loss"])) metrics["train_acc"] = np.mean(acc_all) tqdm.write("Epoch {:d} | Train Acc {:.2f}".format( epoch, metrics["train_acc"])) return metrics
Y = data['fraud_ind'] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) print( f'X_train.shape={X_train.shape}, X_test.shape={X_test.shape}, Y_train.shape={Y_train.shape}, Y_test.shape={Y_test.shape}' ) models = [] models.append(('LR', LogisticRegression())) models.append(('CART', DecisionTreeClassifier())) models.append(('SVM', LinearSVC())) models.append(('RF', RandomForestClassifier())) models.append(('LOF', LocalOutlierFactor())) results = [] names = [] for name, model in tqdm(models): tqdm.write(f'Training {name}...') kfold = KFold(n_splits=10, random_state=42) cv_f1_scores = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='f1') results.append(cv_f1_scores) names.append(name) tqdm.write(f'{name}: {cv_f1_scores.mean()} ({cv_f1_scores.std()})')
def __init__(self, cnn, archive, name, org, subnet, date, GamitConfig, stations, ties=(), centroid=()): """ The GAMIT session object creates all the directory structure and configuration files according to the parameters set in GamitConfig. Two stations list are passed and merged to create the session :param cnn: connection to database object :param archive: archive object to find rinex files in archive structure :param name: name of the project/network :param org: name of the organization :param subnet: subnet number (may be None, in which case the directory name will not show ORGXX :param date: date that is being processed :param GamitConfig: configuration to run gamit :param stations: list of stations to be processed :param ties: tie stations as obtained by pyNetwork """ self.NetName = name self.org = org self.subnet = subnet if subnet is not None: self.DirName = '%s.%s%02i' % (self.NetName, self.org, self.subnet) else: self.DirName = self.NetName self.date = date self.GamitOpts = GamitConfig.gamitopt # type: pyGamitConfig.GamitConfiguration().gamitopt self.Config = GamitConfig # type: pyGamitConfig.GamitConfiguration self.frame = None self.params = None # to store the polyhedron read from the final SINEX self.polyhedron = None self.VarianceFactor = None # gamit task will be filled with the GamitTask object self.GamitTask = None self.solution_base = self.GamitOpts['solutions_dir'].rstrip('/') # tie station dictionary (to build KMLs, do not change) self.tie_dict = [{'name' : stationID(stn), 'coords': [(stn.lon, stn.lat)]} for stn in ties] # station dictionary (to build KMLs, do not change) self.stations_dict = [{'name' : stationID(stn), 'coords' : [(stn.lon, stn.lat)]} for stn in stations] # make StationInstances station_instances = [] for stn in stations: try: station_instances += [StationInstance(cnn, archive, stn, date, GamitConfig)] except pyRinexName.RinexNameException: tqdm.write(' -- WARNING (station instance): station %s on day %s appears to have a badly formed RINEX ' 'filename. Please check the archive and make sure all filenames follow the RINEX 2/3 ' 'convention. Station has been excluded from the GAMIT session.' % (stationID(stn), date.yyyyddd())) # do the same with ties for stn in ties: try: station_instances += [StationInstance(cnn, archive, stn, date, GamitConfig, is_tie=True)] except pyRinexName.RinexNameException: tqdm.write(' -- WARNING (tie instance): station %s on day %s appears to have a badly formed RINEX ' 'filename. Please check the archive and make sure all filenames follow the RINEX 2/3 ' 'convention. Station has been excluded from the GAMIT session.' % (stationID(stn), date.yyyyddd())) self.StationInstances = station_instances # create working dirs for this session last_path = '/%s/%s/%s' % (date.yyyy(), date.ddd(), self.DirName) self.solution_pwd = self.solution_base + last_path # the remote pwd is the directory where the processing will be performed self.remote_pwd = 'production/gamit' + last_path row_key = {'Year' : date.year, 'DOY' : date.doy, 'Project' : self.NetName, 'subnet' : 0 if subnet is None else subnet} try: # attempt to retrieve the session from the database. If error is raised, then the session has to be # reprocessed cnn.get('gamit_stats', row_key.copy()) self.ready = True except: self.ready = False try: # since ready == False, then try to delete record in subnets cnn.delete('gamit_subnets', row_key.copy()) except: pass # a list to report missing data for this session self.missing_data = [] if not os.path.exists(self.solution_pwd): # if the path does not exist, create it! os.makedirs(self.solution_pwd) # force ready = False, no matter what the database says self.ready = False try: cnn.delete('gamit_stats', row_key.copy()) cnn.delete('gamit_subnets', row_key.copy()) except: pass elif os.path.exists(self.solution_pwd) and not self.ready: # if the solution directory exists but the session is not ready, kill the directory rmtree(self.solution_pwd) if not self.ready: # insert the subnet in the database cnn.insert('gamit_subnets', {**row_key, 'stations' : '{%s}' % ','.join(stationID(s) for s in stations + list(ties)), 'alias' : '{%s}' % ','.join(s.StationAlias for s in stations + list(ties)), 'ties' : '{%s}' % ','.join(s['name'] for s in self.tie_dict), 'centroid' : '{%s}' % ','.join('%.1f' % c for c in centroid)}) self.pwd_igs = os.path.join(self.solution_pwd, 'igs') self.pwd_brdc = os.path.join(self.solution_pwd, 'brdc') self.pwd_rinex = os.path.join(self.solution_pwd, 'rinex') self.pwd_tables = os.path.join(self.solution_pwd, 'tables') self.pwd_glbf = os.path.join(self.solution_pwd, 'glbf') self.pwd_proc = os.path.join(self.solution_pwd, date.ddd()) if not self.ready: # only create folders, etc if it was determined the solution isn't ready if not os.path.exists(self.pwd_igs): os.makedirs(self.pwd_igs) if not os.path.exists(self.pwd_brdc): os.makedirs(self.pwd_brdc) if os.path.exists(self.pwd_rinex): # delete any possible rinex files from a truncated session rmtree(self.pwd_rinex) os.makedirs(self.pwd_rinex) if not os.path.exists(self.pwd_tables): os.makedirs(self.pwd_tables) # check that the processing directory doesn't exist. # if it does, remove (it has already been determined that the solution is not ready if os.path.exists(self.pwd_glbf): rmtree(self.pwd_glbf) if os.path.exists(self.pwd_proc): rmtree(self.pwd_proc) self.generate_kml()
def main(data_path, results_file, config): #################################################################################### # Previous operations #################################################################################### ### layers = config['layers'] ### L = len(layers) conv_kernels = config['conv_kernels'] conv_filters = config['conv_filters'] num_classes = config['num_classes'] tf.reset_default_graph( ) # Clear the tensorflow graph (free reserved memory) #################################################################################### # Inputs setup #################################################################################### max_sentence_len = config['max_sentence_len'] # feedforward_inputs (FFI): inputs for the feedforward network (i.e. the encoder). # Should contain the labeled training data (padded to max_sentence_len). feedforward_inputs = tf.placeholder(tf.int32, shape=(None, max_sentence_len), name="FFI") # autoencoder_inputs (AEI): inputs for the autoencoder (encoder + decoder). # Should contain the unlabeled training data (also padded to max_sentence_len). autoencoder_inputs = tf.placeholder(tf.int32, shape=(None, max_sentence_len), name="AEI") outputs = tf.placeholder(tf.float32) # target training = tf.placeholder(tf.bool) # training or evaluation # Not quite sure what is this for FFI = tf.reshape(feedforward_inputs, [-1] + [max_sentence_len]) AEI = tf.reshape(autoencoder_inputs, [-1] + [max_sentence_len]) #################################################################################### # Embeddings weights #################################################################################### embeddings_size = config['embeddings_size'] vocab_size = config['vocab_size'] embeddings_weights = tf.get_variable("embeddings", (vocab_size, embeddings_size), trainable=False) # initializer=tf.random_normal_initializer()) place = tf.placeholder(tf.float32, shape=(vocab_size, embeddings_size)) set_embeddings_weights = embeddings_weights.assign(place) FFI_embeddings = tf.expand_dims(tf.nn.embedding_lookup( embeddings_weights, FFI), axis=-1, name="FFI_embeddings") AEI_embeddings = tf.expand_dims(tf.nn.embedding_lookup( embeddings_weights, AEI), axis=-1, name="AEI_embeddings") #################################################################################### # Batch normalization setup & functions #################################################################################### # to calculate the moving averages of mean and variance # ewma = tf.train.ExponentialMovingAverage(decay=0.99) # # this list stores the updates to be made to average mean and variance # bn_assigns = [] # def update_batch_normalization(batch, output_name="bn", scope_name="BN"): # dim = len(batch.get_shape().as_list()) # mean, var = tf.nn.moments(batch, axes=list(range(0, dim - 1))) # # Function to be used during the learning phase. # # Normalize the batch and update running mean and variance. # with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): # running_mean = tf.get_variable("running_mean", # mean.shape, # initializer=tf.constant_initializer(0)) # running_var = tf.get_variable("running_var", # mean.shape, # initializer=tf.constant_initializer(1)) # assign_mean = running_mean.assign(mean) # assign_var = running_var.assign(var) # bn_assigns.append(ewma.apply([running_mean, running_var])) # with tf.control_dependencies([assign_mean, assign_var]): # z = (batch - mean) / tf.sqrt(var + 1e-10) # return tf.identity(z, name=output_name) def batch_normalization(batch, output_name="bn"): dim = len(batch.get_shape().as_list()) mean, var = tf.nn.moments(batch, axes=list(range(0, dim - 1))) # if mean is None or var is None: # dim = len(batch.get_shape().as_list()) # mean, var = tf.nn.moments(batch, axes=list(range(0, dim - 1))) z = (batch - mean) / tf.sqrt(var + tf.constant(1e-10)) return tf.identity(z, name=output_name) #################################################################################### # Encoder #################################################################################### def encoder_layer(z_pre, noise_std, activation): # Run the layer # z_pre = run_layer(h, layer_spec, output_name="z_pre") # Compute mean and variance of z_pre (to be used in the decoder) dim = len(z_pre.get_shape().as_list()) mean, var = tf.nn.moments(z_pre, axes=list(range(0, dim - 1))) # Create a variable to store the values for latter retrieving them _ = tf.identity(mean, name="mean"), tf.identity(var, name="var") # # Batch normalization # def training_batch_norm(): # if update_BN: # z = update_batch_normalization(z_pre) # else: # z = batch_normalization(z_pre) # return z # def eval_batch_norm(): # with tf.variable_scope("BN", reuse=tf.AUTO_REUSE): # mean = ewma.average(tf.get_variable("running_mean", # shape=z_pre.shape[-1])) # var = ewma.average(tf.get_variable("running_var", # shape=z_pre.shape[-1])) # z = batch_normalization(z_pre, mean, var) # return z # Perform batch norm depending to the phase (training or testing) # z = tf.cond(training, training_batch_norm, eval_batch_norm) z = batch_normalization(z_pre) z += tf.random_normal(tf.shape(z)) * noise_std z = tf.identity(z, name="z") # Center and scale plus activation size = z.get_shape().as_list()[-1] beta = tf.get_variable("beta", [size], initializer=tf.constant_initializer(0)) gamma = tf.get_variable("gamma", [size], initializer=tf.constant_initializer(1)) h = activation(z * gamma + beta) return tf.identity(h, name="h") def encoder(x, noise_std): # Perform encoding for each layer x += tf.random_normal(tf.shape(x)) * noise_std x = tf.identity(x, "h0") # Build the "wide" convolutional layer for each conv_kernel # This is the "first" layer conv_features = [] weight_variables = [] for i, ksize in enumerate(conv_kernels, start=1): with tf.variable_scope("encoder_bloc_" + str(i), reuse=tf.AUTO_REUSE): W = tf.get_variable( "W", (ksize, embeddings_size, 1, conv_filters), initializer=tf.truncated_normal_initializer()) weight_variables.append(W) z_pre = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="VALID", name="z_pre") h = encoder_layer( z_pre, noise_std, # update_BN=update_BN, activation=tf.nn.relu) h = tf.nn.max_pool( h, ksize=[1, max_sentence_len - ksize + 1, 1, 1], strides=[1, 1, 1, 1], padding="VALID", name="global_max_pool") conv_features.append(h) # Build the features layer ("second" layer) total_kernels = len(conv_kernels) total_conv_features = total_kernels * conv_filters with tf.variable_scope("encoder_bloc_" + str(total_kernels + 1), reuse=tf.AUTO_REUSE): h = tf.concat(conv_features, 3) h = tf.reshape(h, (-1, total_conv_features), name="h") # Build the features to classes layer ("last" layer) with tf.variable_scope("encoder_bloc_" + str(total_kernels + 2), reuse=tf.AUTO_REUSE): W = tf.get_variable("W", (total_conv_features, num_classes), initializer=tf.random_normal_initializer()) weight_variables.append(W) print('h shape', h.shape) print('W shape', W.shape) z_pre = tf.matmul(h, W, name="z_pre") h = encoder_layer( z_pre, noise_std, # update_BN=update_BN, activation=tf.nn.softmax) y = tf.identity(h, name="y") return y, weight_variables noise_std = config['noise_std'] with tf.name_scope("FF_clean"): # output of the clean encoder. Used for prediction FF_y, weight_variables = encoder(FFI_embeddings, 0) # , update_BN=False) with tf.name_scope("FF_corrupted"): # output of the corrupted encoder. Used for training. FF_y_corr, _ = encoder(FFI_embeddings, noise_std) # , update_BN=False) with tf.name_scope("AE_clean"): # corrupted encoding of unlabeled instances AE_y, _ = encoder(AEI_embeddings, 0) # , update_BN=True) with tf.name_scope("AE_corrupted"): # corrupted encoding of unlabeled instances AE_y_corr, _ = encoder(AEI_embeddings, noise_std) # , update_BN=False) l2_reg = tf.constant(0.0) for we_var in weight_variables: l2_reg += tf.nn.l2_loss(we_var) #################################################################################### # Decoder #################################################################################### def g_gauss(z_c, u, output_name="z_est", scope_name="denoising_func"): # gaussian denoising function proposed in the original paper size = u.get_shape().as_list()[-1] def wi(inits, name): return tf.Variable(inits * tf.ones([size]), name=name) with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): a1 = wi(0., 'a1') a2 = wi(1., 'a2') a3 = wi(0., 'a3') a4 = wi(0., 'a4') a5 = wi(0., 'a5') a6 = wi(0., 'a6') a7 = wi(1., 'a7') a8 = wi(0., 'a8') a9 = wi(0., 'a9') a10 = wi(0., 'a10') mu = a1 * tf.sigmoid(a2 * u + a3) + a4 * u + a5 v = a6 * tf.sigmoid(a7 * u + a8) + a9 * u + a10 z_est = (z_c - mu) * v + mu return tf.identity(z_est, name=output_name) def get_tensor(input_name, num_encoder_bloc, name_tensor): return tf.get_default_graph().\ get_tensor_by_name(input_name + "/encoder_bloc_" + str(num_encoder_bloc) + "/" + name_tensor + ":0") denoising_cost = config['denoising_cost'] d_cost = [] u = batch_normalization(AE_y_corr, output_name="u_L") # Build first decoder layer (corresponding to the dense layer) total_kernels = len(conv_kernels) total_conv_features = total_kernels * conv_filters with tf.variable_scope("decoder_bloc_" + str(total_kernels + 2), reuse=tf.AUTO_REUSE): z_corr = get_tensor("AE_corrupted", total_kernels + 2, "z") z = get_tensor("AE_clean", total_kernels + 2, "z") mean = get_tensor("AE_clean", total_kernels + 2, "mean") var = get_tensor("AE_clean", total_kernels + 2, "var") # Performs the decoding operations of a corresponding encoder bloc # Denoising z_est = g_gauss(z_corr, u) z_est_BN = (z_est - mean) / tf.sqrt(var + tf.constant(1e-10)) z_est_BN = tf.identity(z_est_BN, name="z_est_BN") # run decoder layer V = tf.get_variable("V", (num_classes, total_conv_features), initializer=tf.random_normal_initializer()) l2_reg += tf.nn.l2_loss(V) u = tf.matmul(z_est, V) u = batch_normalization(u, output_name="u") d_cost.append( (tf.reduce_mean(tf.square(z_est_BN - z))) * denoising_cost[2]) # Build second decoder layer (corresponding to the concatenation+flat layer) with tf.variable_scope("decoder_bloc_" + str(total_kernels + 1), reuse=tf.AUTO_REUSE): u = tf.reshape(u, (-1, 1, 1, total_conv_features)) deconv_features = tf.split(u, total_kernels, axis=3) # Build the final "wide convolutional" layer deconv_layers = [] for i, gmp_layer in enumerate(deconv_features, start=1): ksize = conv_kernels[i - 1] with tf.variable_scope("decoder_bloc_" + str(i), reuse=tf.AUTO_REUSE): u = tf.keras.layers.UpSampling2D(size=(max_sentence_len - ksize + 1, 1))(gmp_layer) z_corr = get_tensor("AE_corrupted", i, "z") z = get_tensor("AE_clean", i, "z") mean = get_tensor("AE_clean", i, "mean") var = get_tensor("AE_clean", i, "var") z_est = g_gauss(z_corr, u) z_est_BN = (z_est - mean) / tf.sqrt(var + tf.constant(1e-10)) z_est_BN = tf.identity(z_est_BN, name="z_est_BN") # run deconvolutional (transposed convolution) layer V = tf.get_variable("V", (ksize, embeddings_size, 1, conv_filters), initializer=tf.truncated_normal_initializer()) l2_reg += tf.nn.l2_loss(V) u = tf.nn.conv2d_transpose(z_est, V, output_shape=tf.shape(AEI_embeddings), strides=[1, 1, 1, 1], padding='VALID') u = batch_normalization(u, output_name="u") deconv_layers.append(u) d_cost.append( (tf.reduce_mean(tf.square(z_est_BN - z))) * denoising_cost[1]) # last decoding step u = tf.concat(deconv_layers, 2) with tf.variable_scope("decoder_bloc_0", reuse=tf.AUTO_REUSE): z_corr = tf.get_default_graph().get_tensor_by_name("AE_corrupted/h0:0") z_corr = tf.concat([z_corr] * total_kernels, 2) z = tf.get_default_graph().get_tensor_by_name("AE_clean/h0:0") z = tf.concat([z] * total_kernels, 2) z_est = g_gauss(z_corr, u) d_cost.append( (tf.reduce_mean(tf.square(z_est - z))) * denoising_cost[0]) #################################################################################### # Loss, accuracy and optimization #################################################################################### u_cost = tf.add_n(d_cost) # reconstruction cost corr_pred_cost = -tf.reduce_mean( tf.reduce_sum(outputs * tf.log(FF_y_corr), 1)) # supervised cost clean_pred_cost = -tf.reduce_mean(tf.reduce_sum(outputs * tf.log(FF_y), 1)) loss = corr_pred_cost + u_cost * config['u_cost_weight'] + config.get( "lambda", 0.0) * l2_reg # total cost predictions = tf.argmax(FF_y, 1) correct_prediction = tf.equal(predictions, tf.argmax(outputs, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # Optimization setting starter_learning_rate = config['starter_learning_rate'] learning_rate = tf.Variable(starter_learning_rate, trainable=False) train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) # add the updates of batch normalization statistics to train_step # bn_updates = tf.group(*bn_assigns) # with tf.control_dependencies([train_step]): # train_step = tf.group(bn_updates) n = np.sum( [np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]) print("There is a total of %d trainable parameters" % n, file=sys.stderr) #################################################################################### # Training #################################################################################### print("=== Loading Data ===", file=sys.stderr) data, w2v_model = input_data_cnn_wide_ladder.read_data_sets( data_path, n_classes=config['num_classes'], n_labeled=config['num_labeled'], maxlen=max_sentence_len) num_examples = data.train.unlabeled_ds.instances.shape[0] batch_size = config['batch_size'] num_epochs = config['num_epochs'] num_iter = (num_examples // batch_size) * num_epochs # number of loop iterations print("=== Starting Session ===", file=sys.stderr) dev_config = tf.ConfigProto() # Don't pre-allocate memory; allocate as-needed dev_config.gpu_options.allow_growth = True # Only allow a total of half the GPU memory to be allocated dev_config.gpu_options.per_process_gpu_memory_fraction = 1 #0.5 sess = tf.Session(config=dev_config) if not os.path.exists(results_file): results_log = open(results_file, "w") print("experiment,split,epoch,accuracy,tloss,lloss,true,pred", file=results_log) else: results_log = open(results_file, "a") init = tf.global_variables_initializer() sess.run(init) print('=== Initializing embeddings with pre-trained weights ===') sess.run(set_embeddings_weights, feed_dict={place: w2v_model.syn0}) #.vectors}) print("=== Training Start ===", file=sys.stderr) tr = trange(0, num_iter, desc="iter: nan - loss: nan") for i in tr: labeled_instances, labels, unlabeled_instances = data.train.next_batch( batch_size) _, tloss, lloss = sess.run( [train_step, loss, clean_pred_cost], feed_dict={ feedforward_inputs: labeled_instances, outputs: labels, autoencoder_inputs: unlabeled_instances, training: True }) tr.set_description("loss: %.5g - lloss: %.5g" % (tloss, lloss)) if (i > 1) and ((i + 1) % (num_iter / num_epochs) == 0) and i < num_iter - 1: # Compute train and validation stats for each epoch epoch_n = i // (num_examples // batch_size) + 1 tqdm.write("=== Epoch %d stats ===" % epoch_n, file=sys.stderr) # For training data we traverse in batches and save all the information training_instances = data.train.labeled_ds.instances training_labels = data.train.labeled_ds.labels mean_accuracy = [] mean_loss = [] for start in trange(0, len(training_labels), batch_size): end = min(start + batch_size, len(training_labels)) epoch_stats = sess.run( [accuracy, loss, clean_pred_cost, predictions], feed_dict={ feedforward_inputs: training_instances[start:end], outputs: training_labels[start:end], autoencoder_inputs: unlabeled_instances, training: False }) mean_accuracy.append(epoch_stats[0]) mean_loss.append(epoch_stats[2]) true_labels = np.argmax(training_labels[start:end], 1) for i in np.arange(true_labels.shape[0]): print("%s,training,%d,%.3g,%.3g,%.3g,%d,%d" % (config["experiment_id"], epoch_n, epoch_stats[0], epoch_stats[1], epoch_stats[2], true_labels[i], epoch_stats[3][i]), file=results_log) tqdm.write("Epoch %d: Accuracy for Training Data: %.3g" % (epoch_n, np.mean(mean_accuracy)), file=sys.stderr) tqdm.write("Epoch %d: Supervised Cost for Training Data: %.3g" % (epoch_n, np.mean(mean_loss)), file=sys.stderr) # For validation data we traverse in batches and save all the information validation_instances = data.validation.instances validation_labels = data.validation.labels mean_accuracy = [] mean_loss = [] for start in trange(0, len(validation_labels), batch_size): end = min(start + batch_size, len(validation_labels)) epoch_stats = sess.run( [accuracy, loss, clean_pred_cost, predictions], feed_dict={ feedforward_inputs: validation_instances[start:end], outputs: validation_labels[start:end], autoencoder_inputs: unlabeled_instances, training: False }) mean_accuracy.append(epoch_stats[0]) mean_loss.append(epoch_stats[2]) true_labels = np.argmax(validation_labels[start:end], 1) for i in np.arange(true_labels.shape[0]): print("%s,validation,%d,%.3g,%.3g,%.3g,%d,%d" % (config["experiment_id"], epoch_n, epoch_stats[0], epoch_stats[1], epoch_stats[2], true_labels[i], epoch_stats[3][i]), file=results_log) tqdm.write("Epoch %d: Accuracy for Validation Data: %.3g" % (epoch_n, np.mean(mean_accuracy)), file=sys.stderr) tqdm.write("Epoch %d: Supervised Cost for Validation Data: %.3g" % (epoch_n, np.mean(mean_loss)), file=sys.stderr) results_log.flush() decay_after = config['decay_after'] if (epoch_n + 1) >= decay_after: # decay learning rate # learning_rate = starter_learning_rate * ((num_epochs - epoch_n) / (num_epochs - decay_after)) ratio = 1.0 * ( num_epochs - (epoch_n + 1) ) # epoch_n + 1 because learning rate is set for next epoch ratio = max(0, ratio / (num_epochs - decay_after)) sess.run(learning_rate.assign(starter_learning_rate * ratio)) print("=== Final stats ===", file=sys.stderr) epoch_n = num_iter // (num_examples // batch_size) + 1 training_instances = data.train.labeled_ds.instances training_labels = data.train.labeled_ds.labels mean_accuracy = [] mean_loss = [] for start in trange(0, len(training_labels), batch_size): end = min(start + batch_size, len(training_labels)) final_stats = sess.run( [accuracy, loss, clean_pred_cost, predictions], feed_dict={ feedforward_inputs: training_instances[start:end], outputs: training_labels[start:end], autoencoder_inputs: unlabeled_instances, training: False }) mean_accuracy.append(final_stats[0]) mean_loss.append(final_stats[2]) true_labels = np.argmax(training_labels[start:end], 1) for i in np.arange(true_labels.shape[0]): print("%s,training,%d,%.3g,%.3g,%.3g,%d,%d" % (config["experiment_id"], epoch_n, final_stats[0], final_stats[1], final_stats[2], true_labels[i], final_stats[3][i]), file=results_log) print("Final Accuracy for Training Data: %.3g" % np.mean(mean_accuracy), file=sys.stderr) print("Final Supervised Cost for Training Data: %.3g" % np.mean(mean_loss), file=sys.stderr) # For validation data we traverse in batches and save all the information validation_instances = data.validation.instances validation_labels = data.validation.labels mean_accuracy = [] mean_loss = [] for start in trange(0, len(validation_labels), batch_size): end = min(start + batch_size, len(validation_labels)) final_stats = sess.run( [accuracy, loss, clean_pred_cost, predictions], feed_dict={ feedforward_inputs: validation_instances[start:end], outputs: validation_labels[start:end], autoencoder_inputs: unlabeled_instances, training: False }) mean_accuracy.append(final_stats[0]) mean_loss.append(final_stats[2]) true_labels = np.argmax(validation_labels[start:end], 1) for i in np.arange(true_labels.shape[0]): print("%s,validation,%d,%.3g,%.3g,%.3g,%d,%d" % (config["experiment_id"], epoch_n, final_stats[0], final_stats[1], final_stats[2], true_labels[i], final_stats[3][i]), file=results_log) print("Final Accuracy for Validation Data: %.3g" % np.mean(mean_accuracy), file=sys.stderr) print("Final Supervised Cost for Validation Data: %.3g" % np.mean(mean_loss), file=sys.stderr) # TEST DATA test_instances = data.test.instances test_labels = data.test.labels for start in trange(0, len(test_labels), batch_size): end = min(start + batch_size, len(test_labels)) final_stats = sess.run( [accuracy, loss, clean_pred_cost, predictions], feed_dict={ feedforward_inputs: test_instances[start:end], outputs: test_labels[start:end], autoencoder_inputs: unlabeled_instances, training: False }) true_labels = np.argmax(test_labels[start:end], 1) for i in np.arange(true_labels.shape[0]): print("%s,test,%d,%.3g,%.3g,%.3g,%d,%d" % (config["experiment_id"], epoch_n, final_stats[0], final_stats[1], final_stats[2], true_labels[i], final_stats[3][i]), file=results_log) print("=== Experiment finished ===", file=sys.stderr) sess.close() results_log.close() return
def create_spixel(*args): try: pixel = SuperPixel(*args) return pixel except ValueError as err: tqdm.write("Skipping SuperPixel. " + str(err))
def train_model(model, datasets, optimizer, criterion, num_epochs=30, batch_size=128, device=None, scheduler=None, out=None): """ train gan(generator, discriminator) with standard gan algorithm Parameters ----------------- models: torch.nn.Module pre-trained model datasets: torch.utils.data.Dataset dataset of image optimizer: torch.optim optimizer for model criterion: torch.nn.Module function that calculates loss num_epochs: int number of epochs batch_size: int number of batch size device: torch.device out: pathlib.Path represent output directory Return ----------------------------- model: torch.nn.Module best model """ epochs = tqdm(range(num_epochs), desc="Epoch", unit='epoch') phases = ['train', 'val'] best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 # construct dataloader dataloader = {phase: torch.utils.data.DataLoader(datasets[phase], batch_size=batch_size, shuffle=(phase == 'train'), num_workers=2) for phase in ['train', 'val']} dataset_sizes = {phase: len(datasets[phase]) for phase in ['train', 'val']} # initialize log log = OrderedDict() # train loop since = datetime.datetime.now() for epoch in epochs: for phase in phases: if phase == 'train': if scheduler is not None: scheduler.step() model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode train_loss = 0.0 train_acc = 0.0 # Iterate over data. iteration = tqdm(dataloader[phase], desc="{} iteration".format(phase.capitalize()), unit='iter') for inputs, labels in iteration: inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) # returns loss is mean_wise loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics train_loss += loss.item() * inputs.size(0) train_acc += torch.sum(preds == labels.data) epoch_loss = train_loss / dataset_sizes[phase] epoch_acc = train_acc.double().item() / dataset_sizes[phase] tqdm.write('Epoch: {:3d} Phase: {:>5} Loss: {:.4f} Acc: {:.4f}'.format( epoch+1, phase.capitalize(), epoch_loss, epoch_acc)) if phase == 'train': # preserve train log log["epoch_{}".format(epoch+1)] = OrderedDict(train_loss=epoch_loss, train_acc=epoch_acc) elif phase == 'val': # preserve val log log["epoch_{}".format(epoch+1)].update(OrderedDict(val_loss=epoch_loss, val_acc=epoch_acc)) if epoch_acc > best_acc: # deep copy the model best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) # save model by epoch torch.save(model.state_dict(), out / "model_{}epoch.pt".format(epoch+1)) tqdm.write("-"*60) time_elapsed = datetime.datetime.now() - since tqdm.write('Training complete in {}'.format(time_elapsed)) tqdm.write('Best val Acc: {:4f}'.format(best_acc), end="\n\n") # load best model weights model.load_state_dict(best_model_wts) # if test set exists, calculate loss and accuracy for best model if "test" in datasets: model.eval() testloader = torch.utils.data.DataLoader(datasets["test"], batch_size=batch_size, shuffle=False, num_workers=2) iteration = tqdm(testloader, desc="Test iteration", unit='iter') test_loss = 0.0 test_acc = 0 with torch.no_grad(): for inputs, labels in iteration: inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs.data, 1) # returns loss is mean_wise loss = criterion(outputs, labels) # statistics test_loss += loss.item() * inputs.size(0) test_acc += torch.sum(preds == labels.data) test_loss = test_loss / len(datasets["test"]) test_acc = test_acc.double().item() / len(datasets["test"]) tqdm.write('Phase: {} Loss: {:.4f} Acc: {:.4f}'.format( "Test", test_loss, test_acc), end="\n\n") # preserve test log log['test'] = OrderedDict(test_loss=test_loss, test_acc=test_acc) # save log with open(out / "log.json", "w") as f: json.dump(log, f, indent=4, separators=(',', ': ')) return model
def test_loop( self, test_loader, verbose=False, normalizer=None, return_all=False, debug=False, debug_dir=None, ): """ Run a model test loop :param test_loader: torch.utils.data.DataLoader for testing, generated by data.datamgr.SetDataManager :param verbose: if verbose, use tqdm to display progress :param normalizer: a torchvision.transforms.Transform object used to normalize the image before evaluation. Used if debug is set, and we want the original image to save to img file :param return_all: return an np.array of hits (1s or 0s), instead of summary loss/acc statistics :param debug: don't actually evaluate test loop; evaluate a few episodes then save their results in `debug_dir` :param debug_dir: if debug is set, save to this directory :returns: either an (acc, loss) tuple, or an np.array of 1s and 0s, where 1 indicates a correct prediction, for the entire dataset """ acc_all = [] loss_all = [] iter_num = len(test_loader) if verbose: ranger = tqdm(enumerate(test_loader), desc="test", total=len(test_loader)) else: ranger = enumerate(test_loader) for i, (x, target, lang) in ranger: if normalizer is not None: xdim = x.shape xflat = x.clone().view(xdim[0] * xdim[1], *xdim[2:]) xnorm = torch.stack([normalizer(x) for x in xflat]) xnorm = xnorm.view(*xdim) else: xnorm = x self.n_query = x.size(1) - self.n_support if self.l3: correct_this, count_this, loss_this = self.correct_l3( xnorm, return_loss=True, debug=debug, index=i, x_orig=x, debug_dir=debug_dir, ) else: correct_this, count_this, loss_this = self.correct( xnorm, return_loss=True) acc_all.append(correct_this / count_this * 100) loss_all.append(loss_this.item()) acc_all = np.asarray(acc_all) loss_all = np.asarray(loss_all) acc_mean = np.mean(acc_all) loss_mean = np.mean(loss_all) acc_std = np.std(acc_all) tqdm.write("%d Test Loss %f Acc = %4.2f%% +- %4.2f%%" % (iter_num, loss_mean, acc_mean, 1.96 * acc_std / np.sqrt(iter_num))) if return_all: return acc_all return acc_mean, loss_mean
def _iter_metadata(**kwargs): for prefix, data in iter_helper_helper(get_metadata, **kwargs): version = data["version"] tqdm.write(f"[{prefix}] using version {version}") yield prefix, version, data["date"], bioregistry.is_deprecated(prefix)
def speech_length_histogram( sessions: Iterable[int], histogram_upper_bound: int = 50, metadata_of_interest: Set[str] = {'party', 'chamber', 'gender', 'state'}, identities: Set[str] = {'Dem', 'GOP', 'Senate', 'House', 'Male', 'Female'} ) -> None: speeches_length: defaultdict[str, List[int]] = defaultdict(list) for session_index in tqdm(sessions): metadata: Dict[str, Dict[str, str]] = dict() metadata_path = f'corpora/bound/{session_index:0>3d}_SpeakerMap.txt' with open(metadata_path) as metadata_file: reader = csv.DictReader(metadata_file, delimiter='|') for speaker_data in reader: if speaker_data['nonvoting'] == 'nonvoting': continue speaker: Dict[str, str] = { attribute: speaker_data[attribute] for attribute in metadata_of_interest} metadata[speaker_data['speech_id']] = speaker speech_count = 0 missing_metadata_count = 0 corpus_path = f'corpora/bound/speeches_{session_index:0>3d}.txt' with open(corpus_path, encoding=input_encoding) as corpus_file: corpus_file.readline() # discard header line for line in corpus_file: try: speech_id, speech = line.split('|') speech_count += 1 if speech_id not in metadata: missing_metadata_count += 1 continue speaker = metadata[speech_id] party = speaker['party'] chamber = speaker['chamber'] gender = speaker['gender'] state = speaker['state'] speech_length = len(speech.split()) speeches_length[state].append(speech_length) if party == 'D': speeches_length['Dem'].append(speech_length) elif party == 'R': speeches_length['GOP'].append(speech_length) # else: # print('Spoiler effect:', party) if chamber == 'S': speeches_length['Senate'].append(speech_length) elif chamber == 'H': speeches_length['House'].append(speech_length) else: print('Bicameralism is bad enough:', chamber) if gender == 'M': speeches_length['Male'].append(speech_length) elif gender == 'F': speeches_length['Female'].append(speech_length) else: print('Nonbinary:') except ValueError: # from spliting line with '|' continue missing_metadata_ratio = missing_metadata_count / speech_count tqdm.write(f'{missing_metadata_ratio:.2%} speeches in {corpus_path} ' 'are missing metadata and excluded from the output corpus.') for metadata_name in identities: bounded_lengths = [length for length in speeches_length[metadata_name] if length < histogram_upper_bound] if len(bounded_lengths) == 0: raise ValueError(f'{metadata_name} is empty?') fig, ax = plt.subplots() ax = sns.distplot(bounded_lengths, label=metadata_name) ax.legend() fig.savefig(f'graphs/speech_length/{metadata_name}.pdf')
def run(run_obj): """ Function to run FastSinkSource, FastSinkSourcePlus, Local and LocalPlus *terms_to_run*: terms for which to run the method. Must be a subset of the terms present in the ann_obj """ params_results = run_obj.params_results P, alg, params = run_obj.P, run_obj.name, run_obj.params #if 'solver' in params: # make sure the term_scores matrix is reset # because if it isn't empty, overwriting the stored scores seems to be time consuming term_scores = sp.lil_matrix(run_obj.ann_matrix.shape, dtype=np.float) print("Running %s with these parameters: %s" % (alg, params)) if len(run_obj.target_prots) != len(run_obj.net_obj.nodes): print("\tstoring scores for only %d target prots" % (len(run_obj.target_prots))) # run FastSinkSource on each term individually #for i in trange(run_obj.ann_matrix.shape[0]): #term = run_obj.terms[i] for term in tqdm(run_obj.terms_to_run): idx = run_obj.ann_obj.term2idx[term] # get the row corresponding to the current terms annotations y = run_obj.ann_matrix[idx, :] positives = (y > 0).nonzero()[1] negatives = (y < 0).nonzero()[1] # if this method uses positive examples only, then remove the negative examples if alg in ["fastsinksourceplus", "sinksourceplus", "localplus"]: negatives = None if run_obj.net_obj.weight_gmw is True: start_time = time.process_time() # weight the network for each term individually W, _, _ = run_obj.net_obj.weight_GMW(y.toarray()[0], term) P = alg_utils.normalizeGraphEdgeWeights( W, ss_lambda=params.get('lambda')) params_results['%s_weight_time' % (alg)] += time.process_time() - start_time # now actually run the algorithm if alg in [ "fastsinksource", "fastsinksourceplus", "sinksource", "sinksourceplus" ]: a, eps, max_iters = params['alpha'], float( params['eps']), params['max_iters'] # if a solver is given, it will be used. Otherwise it will use regular power iteration solver = params.get('solver') tol = float(params['tol']) if 'tol' in params else 1e-5 scores, process_time, wall_time, iters = fastsinksource.runFastSinkSource( P, positives, negatives=negatives, max_iters=max_iters, eps=eps, a=a, tol=tol, solver=solver, verbose=run_obj.kwargs.get('verbose', False)) elif alg in ["local", "localplus"]: scores, process_time, wall_time = fastsinksource.runLocal( P, positives, negatives=negatives) iters = 1 if run_obj.kwargs.get('verbose', False) is True: tqdm.write("\t%s converged after %d iterations " % (alg, iters) + "(%0.4f sec) for %s" % (process_time, term)) # limit the scores to the target nodes if len(run_obj.target_prots) != len(scores): #print("\tstoring results for %d target prots" % (len(run_obj.target_prots))) mask = np.ones(len(scores), np.bool) mask[run_obj.target_prots] = False scores[mask] = 0 # 0s are not explicitly stored in lil matrix term_scores[idx] = scores # also keep track of the time it takes for each of the parameter sets alg_name = "%s%s" % (alg, run_obj.params_str) params_results["%s_wall_time" % alg_name] += wall_time params_results["%s_process_time" % alg_name] += process_time run_obj.term_scores = term_scores run_obj.params_results = params_results return
def write(cls, msg): tqdm.write(msg, end='')
def main(): # Init logger if not os.path.isdir(args.save_path): os.makedirs(args.save_path) print('Dataset: {}'.format(args.dataset.upper())) if args.dataset == "seedlings" or args.dataset == "bone": classes, class_to_idx, num_to_class, df = GenericDataset.find_classes( args.data_path) if args.dataset == "ISIC2017": classes, class_to_idx, num_to_class, df = GenericDataset.find_classes_melanoma( args.data_path) df.head(3) args.num_classes = len(classes) # Init model, criterion, and optimizer # net = models.__dict__[args.arch](num_classes) # net= kmodels.simpleXX_generic(num_classes=args.num_classes, imgDim=args.imgDim) # net= kmodels.vggnetXX_generic(num_classes=args.num_classes, imgDim=args.imgDim) # net= kmodels.vggnetXX_generic(num_classes=args.num_classes, imgDim=args.imgDim) net = kmodels.dpn92(num_classes=args.num_classes) # net= kmodels.inception_v3(num_classes=args.num_classes) # print_log("=> network :\n {}".format(net), log) real_model_name = (type(net).__name__) print("=> Creating model '{}'".format(real_model_name)) # if real_model_name is "Inception3": # net = inception_v3(pretrained=True) # net.fc = nn.Linear(2048, args.num_classes) import datetime exp_name = datetime.datetime.now().strftime(real_model_name + '_' + args.dataset + '_%Y-%m-%d_%H-%M-%S') print('Training ' + real_model_name + ' on {} dataset:'.format(args.dataset.upper())) mPath = args.save_path + '/' + args.dataset + '/' + real_model_name + '/' args.save_path_model = mPath if not os.path.isdir(args.save_path_model): os.makedirs(args.save_path_model) log = open(os.path.join(mPath, 'seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print("Random Seed: {}".format(args.manualSeed)) print("python version : {}".format(sys.version.replace('\n', ' '))) print("torch version : {}".format(torch.__version__)) print("cudnn version : {}".format(torch.backends.cudnn.version())) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) normalize_img = torchvision.transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_trans = transforms.Compose([ transforms.RandomSizedCrop(args.img_scale), PowerPIL(), transforms.ToTensor(), # normalize_img, RandomErasing() ]) ## Normalization only for validation and test valid_trans = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(args.img_scale), transforms.ToTensor(), # normalize_img ]) test_trans = valid_trans train_data = df.sample(frac=args.validationRatio) valid_data = df[~df['file'].isin(train_data['file'])] train_set = GenericDataset(train_data, args.data_path, transform=train_trans) valid_set = GenericDataset(valid_data, args.data_path, transform=valid_trans) t_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=0) v_loader = DataLoader(valid_set, batch_size=args.batch_size, shuffle=True, num_workers=0) # test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4) dataset_sizes = { 'train': len(t_loader.dataset), 'valid': len(v_loader.dataset) } print(dataset_sizes) # net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) criterion = torch.nn.CrossEntropyLoss() # optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], # weight_decay=state['decay'], nesterov=True) # optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate) optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) # optimizer = torch.optim.Adam(net.parameters(), lr=state['learning_rate']) if args.use_cuda: net.cuda() criterion.cuda() recorder = RecorderMeter(args.epochs) # optionally resume from a checkpoint if args.evaluate: validate(v_loader, net, criterion, log) return if args.tensorboard: configure("./logs/runs/%s" % (exp_name)) print(' Total params: %.2fM' % (sum(p.numel() for p in net.parameters()) / 1000000.0)) # Main loop start_training_time = time.time() training_time = time.time() start_time = time.time() epoch_time = AverageMeter() for epoch in tqdm(range(args.start_epoch, args.epochs)): current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ # print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) tqdm.write( '\n==>>Epoch=[{:03d}/{:03d}]], {:s}, LR=[{}], Batch=[{}]'.format( epoch, args.epochs, time_string(), state['learning_rate'], args.batch_size) + ' [Model={}]'.format( (type(net).__name__), ), log) # train for one epoch train_acc, train_los = train(t_loader, net, criterion, optimizer, epoch, log) val_acc, val_los = validate(v_loader, net, criterion, epoch, log) is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() training_time = time.time() - start_training_time recorder.plot_curve( os.path.join(mPath, real_model_name + '_' + exp_name + '.png'), training_time, net, real_model_name, dataset_sizes, args.batch_size, args.learning_rate, args.dataset, args.manualSeed, args.num_classes) if float(val_acc) > float(95.0): print("*** EARLY STOP ***") df_pred = testSeedlingsModel(args.test_data_path, net, num_to_class, test_trans) model_save_path = os.path.join( mPath, real_model_name + '_' + str(val_acc) + '_' + str(val_los) + "_" + str(epoch)) df_pred.to_csv(model_save_path + "_sub.csv", columns=('file', 'species'), index=None) torch.save(net.state_dict(), model_save_path + '_.pth') save_checkpoint( { 'epoch': epoch + 1, # 'arch': args.arch, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer': optimizer.state_dict(), }, is_best, mPath, str(val_acc) + '_' + str(val_los) + "_" + str(epoch) + '_checkpoint.pth.tar') log.close()