from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold from sklearn.ensemble import RandomForestClassifier, VotingClassifier from lightgbm import LGBMClassifier from xgboost import XGBClassifier from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, precision_recall_curve, roc_curve, auc from imblearn.over_sampling import SMOTE, BorderlineSMOTE, SVMSMOTE, ADASYN import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei'] plt.rcParams['axes.unicode_minus'] = False from matplotlib.font_manager import FontProperties import seaborn as sns myfont = FontProperties(fname='Microsoft JhengHei', size=14) sns.set(font=myfont.get_family()) sns.set_style("darkgrid", {"font.sans-serif": ['Microsoft JhengHei']}) import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei'] plt.rcParams['axes.unicode_minus'] = False from matplotlib.font_manager import FontProperties import seaborn as sns myfont = FontProperties(fname='Microsoft JhengHei', size=14) sns.set(font=myfont.get_family()) sns.set_style("darkgrid", {"font.sans-serif": ['Microsoft JhengHei']}) # generate 2 class dataset X, y = make_classification(n_samples=1000,
from traits.api import HasTraits from traitsui.api import Item, View from traitsui.menu import OKButton, CancelButton __QS_MainPath__ = os.path.split(os.path.realpath(__file__))[0] __QS_LibPath__ = __QS_MainPath__+os.sep+"Lib" __QS_ConfigPath__ = os.path.expanduser("~")+os.sep+"QuantStudioConfig" from matplotlib.pylab import mpl if platform.system()=="Windows": mpl.rcParams['font.sans-serif'] = ["SimHei"] elif platform.system()=="Darwin": if os.path.isfile("/Library/Fonts/Arial Unicode.ttf"): from matplotlib.font_manager import FontProperties Font = FontProperties(fname="/Library/Fonts/Arial Unicode.ttf") mpl.rcParams["font.family"] = Font.get_family() mpl.rcParams["font.sans-serif"] = Font.get_name() mpl.rcParams['axes.unicode_minus'] = False # Quant Studio 系统错误 class __QS_Error__(Exception): """Quant Studio 错误""" pass # Quant Studio 系统对象 class __QS_Object__(HasTraits): """Quant Studio 系统对象""" def __init__(self, sys_args={}, config_file=None, **kwargs): self._QS_Logger = kwargs.pop("logger", None) if self._QS_Logger is None: self._QS_Logger = logging.getLogger() super().__init__(**kwargs)
class TrainAndEvaluate: def __init__(self,hyperparameters,seed=0,eval=False,**kwargs) -> None: print(hyperparameters) #setup matplotlib fonts self.prop = FontProperties(fname="NotoColorEmoji.tff") plt.rcParams['font.family'] = self.prop.get_family() self.eval=eval #setup device self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # self.device= torch.device("cpu") #setup memory reporter self.reporter = MemReporter() #setup random seed: self.rng = np.random.RandomState(seed=seed) self.hyperparameters = hyperparameters def run(self): with wandb.init(project="bike-1b",config=self.hyperparameters, name=self.hyperparameters["exp_name"],save_code=True): # access all HPs through wandb.config, so logging matches execution! self.config = wandb.config # make the model, data, and optimization problem self.make() # and use them to train the model torch.cuda.empty_cache() self.reporter.report() if self.eval==False: self.train() print("testing:") self.evaluate(dataset='test') # and test its final performance return self.model def make(self): # Make the data self.train_loader = self.hyperparameters["dataloader"](data_set_type='train',**self.hyperparameters["dataloader_params"]) self.test_loader = self.hyperparameters["dataloader"](data_set_type='test',**self.hyperparameters["dataloader_params"]) self.val_loader = self.hyperparameters["dataloader"](data_set_type="val",**self.hyperparameters["dataloader_params"]) self.tiny_val_loader = self.hyperparameters["dataloader"](root=self.hyperparameters["dataloader_params"]["root"],data_set_type="val",data_set_size = 4, normalize = True, balance = 0.5, num_workers = 20, data_splits = {"val":1.0 }, prefetch_factor=1, batch_size = 4, transforms = self.hyperparameters["tiny_transforms"], shuffle=False) for name,loader in zip(["train","val","test"],[self.train_loader,self.val_loader,self.test_loader]): print(f"{name} loader stats:\t number of pairs: {len(loader.dataset)}\t") print(f"number of positive pairs: \t {loader.dataset.num_same_ad}") print(f"number of negative pairs: \t {loader.dataset.num_diff_ad}") print(f"number of Ads used: \t {len(loader.dataset.ad_to_img.keys())}") print("#"*5) print(f"Training set size: {len(self.train_loader.dataset)}") if self.hyperparameters["clear_redis"] == True: print("flushing redis. Expect a slower first epoch :(") self.train_loader.flush_redis() #filepaths to small batch of images to vizualise the backbone layer outputs self.tiny_filepaths = self.tiny_val_loader.dataset.same_ad_filenames + self.tiny_val_loader.dataset.diff_ad_filenames # self.tiny_filepaths = list(sum(self.tiny_filepaths, ())) # Flatten list of tuples into list # self.tiny_filepaths = [a for b in self.tiny_filepaths for b in a] self.tiny_filepaths = list(chain.from_iterable(self.tiny_filepaths)) tiny_image_as, tiny_image_bs, _ = next(iter(self.tiny_val_loader)) # Flatten batch of image pairs to batch of single images image_list = [torch.unsqueeze(x,0) for x in chain.from_iterable(zip(tiny_image_as,tiny_image_bs))] self.tiny_batch = torch.cat(image_list) # Make the model self.model = self.hyperparameters["model"](**self.config) # Make the loss and optimizer try: self.criterion = self.hyperparameters["criterion"](**self.hyperparameters) except: self.criterion = self.hyperparameters["criterion"]() self.base_optimizer = Adam(self.model.parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay) # load weights and optimizer state if continuing: if self.config.starting_epoch>0: path = self.config.project_path checkpoint = torch.load(join(path,"models",f"model_{self.config.starting_epoch}.tar")) self.model.load_state_dict(checkpoint["model_state_dict"]) self.base_optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) for g in self.base_optimizer.param_groups: g['lr'] = self.config.lr g["weight_decay"] = self.config.weight_decay # make weights half precision if told to if self.config.half_precision: self.model.half() # convert to half precision #make sure bn layers are floats for stability for layer in self.model.modules(): if isinstance(layer, nn.BatchNorm2d): layer.float() self.model.to(self.device) self.move_base_optimizer_to_device() self.optimizer = CosineAnnealingLR(self.base_optimizer,last_epoch=-1, T_max = self.hyperparameters["epochs"], eta_min=0.00002) for _ in range(self.hyperparameters["starting_epoch"]): self.optimizer.step() def train(self): wandb.watch(self.model,self.criterion,log="all",log_freq=10) # Run training and track with wandb example_seen = 0 # number of examples seen batch_seen = 0 for epoch in range(self.config.starting_epoch,self.config.epochs): self.model.train() self.current_epoch = epoch with tqdm(total=len(self.train_loader),ncols=120) as pbar_train: for data in self.train_loader: torch.cuda.empty_cache() self.image_as, self.image_bs,labels = data[0].to(self.device),data[1].to(self.device),data[2].to(self.device) loss,outputs = self.train_batch([self.image_as,self.image_bs,labels]) example_seen += data[0].shape[0] batch_seen += 1 # Report metrics every 10 batches if batch_seen % 10 == 0: self.model.track_metrics(outputs,epoch,step=example_seen,criterion=self.criterion,loss=loss,split="train") pbar_train.update(1) pbar_train.set_description(f" Epoch: {epoch} loss: {loss:.4f}") #validate torch.cuda.empty_cache() # reporter.report() self.evaluate(dataset='val',epoch=epoch) def train_batch(self,data): loss,outputs,labels = self.model.train_batch(data,self.criterion,self.device,self.model) #backward pass: self.base_optimizer.zero_grad() loss.backward() self.optimizer.step(epoch=self.current_epoch) self.base_optimizer.step() if self.hyperparameters["model"] == BaselineModel_1b: return loss.detach().item(),[[outputs[0].detach().cpu(),outputs[1].detach().cpu()],labels.detach().cpu()] elif self.hyperparameters["model"] == BaselineModel_1a: return loss.detach().item(),[outputs,labels.detach().cpu()] else: raise Exception("Splat") def evaluate(self,dataset="val",epoch=None): path=self.config.project_path #put model in evaluation mode: accuracies = [] losses = [] viz_flag = True list_of_outputs = None list_of_image_a_outputs = None list_of_image_b_outputs = None list_of_labels = None #Visualise attention maps of the model if self.hyperparameters["viz_attention"]: self.model.am_viz(self.tiny_batch, self.tiny_filepaths) loader = self.val_loader if dataset=="val" else self.test_loader with torch.no_grad(): for data in loader: torch.cuda.empty_cache() # reporter.report() self.image_as, self.image_bs,labels = data[0].to(self.device),data[1].to(self.device),data[2].to(self.device) loss, accuracy, outputs = self.model.evaluate_batch([self.image_as,self.image_bs,labels],self.criterion,self.device,self.model) if viz_flag: list_of_image_a_outputs = outputs[0].cpu() list_of_image_b_outputs = outputs[1].cpu() list_of_labels = data[2].cpu() if self.hyperparameters["model"] == BaselineModel_1a: self.model.visualize(data, outputs[0], epoch, number_of_figures=self.hyperparameters["number_of_figures"], unNormalizer = UnNormalize(loader.means,loader.stds)) viz_flag =False else: list_of_image_a_outputs = torch.cat((list_of_image_a_outputs, outputs[0].cpu()), 0) list_of_image_b_outputs = torch.cat((list_of_image_b_outputs, outputs[0].cpu()), 0) list_of_labels = torch.cat((list_of_labels,data[2].cpu()),0) losses.append(loss) accuracies.append(accuracy) list_of_outputs = [[list_of_image_a_outputs, list_of_image_b_outputs], list_of_labels] if dataset == "val": self.model.track_metrics(list_of_outputs,epoch,step=epoch,criterion=self.criterion,loss=np.mean(losses),split="val") # wandb.log({"{}_accuracy".format(dataset): np.mean(accuracies),"global_step":epoch}) # wandb.log({"{}_loss".format(dataset): np.mean(losses),"global_step":epoch}) # Save the model actual_path = join(path,"models") if not os.path.exists(actual_path): os.makedirs(actual_path) #save weights and optimizer torch.save({ "epoch":epoch, "model_state_dict":self.model.state_dict(), "optimizer_state_dict":self.base_optimizer.state_dict() },join(path,"models",f"model_{epoch}.tar")) if dataset == "test": self.model.track_extra_metrics(list_of_outputs, epoch,split="test") def move_base_optimizer_to_device(self): for param in self.base_optimizer.state.values(): # Not sure there are any global tensors in the state dict if isinstance(param, torch.Tensor): param.data = param.data.to(self.device) if param._grad is not None: param._grad.data = param._grad.data.to(self.device) elif isinstance(param, dict): for subparam in param.values(): if isinstance(subparam, torch.Tensor): subparam.data = subparam.data.to(self.device) if subparam._grad is not None: subparam._grad.data = subparam._grad.data.to(self.device)
#%% import json import re import matplotlib.pyplot as plt import pandas as pd import matplotlib.dates as mdates from wordcloud import WordCloud, STOPWORDS from matplotlib.font_manager import FontProperties plt.rc('font', family='DejaVu Sans') plt.rc('font', serif='Helvetica Neue') plt.rc('text', usetex='false') prop = FontProperties(fname='/System/Library/Fonts/Apple Color Emoji.ttc') plt.rcParams['font.family'] = prop.get_family() plt.rcParams.update({'font.size': 10}) plt.rcParams['figure.figsize'] = [15, 10] def fixHex(m): hexs = m.string[m.start():m.end()].replace("\\x", "") hexStr = bytes.fromhex(hexs).decode('utf-8') return hexStr def fixHexStr(s): return re.sub(r'\\x(f0)(?:\\x(..))+', lambda m: fixHex(m), bytes(s, 'unicode-escape').decode('utf-8')) def messagesPer(messages, freq): s = pd.DataFrame(messages) s['timestamp'] = s.apply(
class Chart: def __init__(self, props, subdefaults): self.file = None self.filename = None self.props = { "imageType": "png", # Output image type. "dpi": 72, # Image DPI resolution. "width": 800, # Output image width in pixels (vs. DPI). "height": 600, # Output image height in pixels (vs. DPI). "padding": 50, # Padding around the figure edge, in pixels. "textPadding": 3, # Padding around text, in pixels. "fontFamily": "sans-serif", # Font family. "fontName": "Luxi Sans", # Font name. "fontSize": 12, # Size of non-title text, in pixels. "titleFontName": "Bitstream Vera Sans", "titleFontSize": 18, # Size of title text, in pixels. "subtitleFontSize": 14, # Size of subtitle text, in pixels. "axtitleFontSize": 16, # Size of axis title text, in pixels. "xAxisScale": "lin", # X axis scale, lin or log. "yAxisScale": "lin", # Y axis scale, lin or log. "square": False, # Force square layout. "integral": False, #force integral display on legend "notitle": False, #suppress title "nolabels": False, #suppress axis labels "sort":False, #sort items and legend entries by value "max_legend":100 #maximum items in the legend } for (k, v) in subdefaults.iteritems(): self.props[k] = v for (k, v) in props.iteritems(): self.props[k] = v self.font = FontProperties() self.font.set_family(self.get('fontFamily')) self.font.set_name(self.get('fontName')) self.font.set_size(float(self.get('fontSize'))) self.tfont = FontProperties() self.tfont.set_family(self.get('titleFontFamily', self.font.get_family()[-1])) self.tfont.set_name(self.get('titleFontName', self.font.get_name())) self.tfont.set_size(self.get('titleFontSize', self.font.get_size())) self.tfont.set_weight('bold') self.sfont = FontProperties() self.sfont.set_family(self.get('subtitleFontFamily', self.tfont.get_family()[-1])) self.sfont.set_name(self.get('subtitleFontName', self.tfont.get_name())) self.sfont.set_size(self.get('subtitleFontSize', self.tfont.get_size())) self.afont = FontProperties() self.afont.set_family(self.get('axtitleFontFamily', self.tfont.get_family()[-1])) self.afont.set_name(self.get('axtitleFontName', self.tfont.get_name())) self.afont.set_size(self.get('axtitleFontSize', self.tfont.get_size())) self.afont.set_weight('bold') def __del__(self): if self.file != None: os.close(self.file) def get(self, key, default=None): return getattr(self, key, self.props.get(key, default)) def draw(self): (fig, canvas, w, h) = self.canvas() fig.text(.5, .5, "No data.", horizontalalignment='center', fontproperties=self.font) return self.save(fig, canvas) def legend(self): (fig, canvas, w, h) = self.canvas() fig.text(.5, .5, "No legend.", horizontalalignment='center', fontproperties=self.font) return self.save(fig, canvas) def details(self): return {} def canvas(self): type = self.get("imageType", "png") fig = Figure() if type == "png": canvas = FigureCanvasAgg(fig) (self.file, self.filename) = mkstemp(".%s" % type) elif type == "svg": canvas = FigureCanvasSVG(fig) (self.file, self.filename) = mkstemp(".%s" % type) elif type == "pdf": canvas = FigureCanvasPdf(fig) (self.file, self.filename) = mkstemp(".%s" % type) elif type == "ps" or type == "eps": canvas = FigureCanvasPS(fig) (self.file, self.filename) = mkstemp(".%s" % type) else: raise "Invalid render target requested" # Set basic figure parameters dpi = float(self.get('dpi')) (w, h) = (float(self.get('width')), float(self.get('height'))) (win, hin) = (w/dpi, h/dpi) fig.set_size_inches(win, hin) fig.set_dpi(dpi) fig.set_facecolor('white') return (fig, canvas, w, h) def save(self, fig, canvas): canvas.draw() if not self.filename: canvas.print_figure(self.file, dpi=float(self.get('dpi'))) return self.file else: canvas.print_figure(self.filename, dpi=float(self.get('dpi'))) f = file(self.filename, "r") # PS backend writes over the file os.remove(self.filename) # return os.fdopen(self.file) os.close(self.file) self.file = None self.filename = None return f def prepare(self): # Create canvas and determine figure parameters (fig, canvas, w, h) = self.canvas() dpif = float(self.get('dpi')) / 72 padding = float(self.get('padding')) * dpif / h textPadding = float(self.get('textPadding')) * dpif titleFontSize = float(self.get('titleFontSize')) axFontSize = float(self.get('axtitleFontSize')) subFontSize = float(self.get('subtitleFontSize')) title = self.get("title", "").split("\n") if not self.get("notitle",False): hsub = (len(title)-1) * (subFontSize * dpif + 4) + textPadding htitle = hsub + titleFontSize * dpif + textPadding * 2 else: hsub = 0 htitle = 0 if self.get("nolabels",False): padding = 0 # Configure axes if self.get('square'): minsize = 1 - 2*padding axrect = (.5 - minsize/2 * h/w, padding, h/w * minsize, minsize) else: axrect = (padding, padding, 1 - 1.25*padding, 1 - htitle/h - padding) ax = fig.add_axes(axrect) #frame = ax.get_frame() #frame.set_fill(False) xlog = (str(self.get('xAxisScale', "lin")) == 'log') ylog = (str(self.get('yAxisScale', "lin")) == 'log') if xlog: ax.semilogx() if ylog: ax.semilogy() setp(ax.get_xticklabels(), fontproperties=self.font) setp(ax.get_yticklabels(), fontproperties=self.font) setp(ax.get_xticklines(), markeredgewidth=2.0, zorder=4.0) setp(ax.get_yticklines(), markeredgewidth=2.0) ax.grid(True, alpha=0.25, color='#000000', linewidth=0.1) # Set titles if not self.get("notitle",False): ax.title = ax.text(.5, 1+(hsub+textPadding)/(axrect[-1]*h), title[0], verticalalignment='bottom', horizontalalignment='center', transform=ax.transAxes, clip_box=None, fontproperties=self.tfont) ax._set_artist_props(ax.title) if len(title) > 1: ax.subtitle = ax.text(.5, 1+textPadding/(axrect[-1]*h), "\n".join(title[1:]), verticalalignment='bottom', horizontalalignment='center', transform=ax.transAxes, clip_box=None, fontproperties=self.sfont) if not self.get("nolabels",False): ax.set_xlabel(self.get("xAxisTitle", ""), fontproperties=self.afont) ax.set_ylabel(self.get("yAxisTitle", ""), fontproperties=self.afont) return (fig, canvas, ax) def map(self): return '' def make(self): img = self.draw().read() legend = self.legend().read() map = self.map() return img,legend,map