예제 #1
0
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, precision_recall_curve, roc_curve, auc
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, SVMSMOTE, ADASYN

import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei']
plt.rcParams['axes.unicode_minus'] = False
from matplotlib.font_manager import FontProperties
import seaborn as sns

myfont = FontProperties(fname='Microsoft JhengHei', size=14)
sns.set(font=myfont.get_family())
sns.set_style("darkgrid", {"font.sans-serif": ['Microsoft JhengHei']})

import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei']
plt.rcParams['axes.unicode_minus'] = False
from matplotlib.font_manager import FontProperties
import seaborn as sns

myfont = FontProperties(fname='Microsoft JhengHei', size=14)
sns.set(font=myfont.get_family())
sns.set_style("darkgrid", {"font.sans-serif": ['Microsoft JhengHei']})

# generate 2 class dataset
X, y = make_classification(n_samples=1000,
예제 #2
0
from traits.api import HasTraits
from traitsui.api import Item, View
from traitsui.menu import OKButton, CancelButton

__QS_MainPath__ = os.path.split(os.path.realpath(__file__))[0]
__QS_LibPath__ = __QS_MainPath__+os.sep+"Lib"
__QS_ConfigPath__ = os.path.expanduser("~")+os.sep+"QuantStudioConfig"

from matplotlib.pylab import mpl
if platform.system()=="Windows":
    mpl.rcParams['font.sans-serif'] = ["SimHei"]
elif platform.system()=="Darwin":
    if os.path.isfile("/Library/Fonts/Arial Unicode.ttf"):
        from matplotlib.font_manager import FontProperties
        Font = FontProperties(fname="/Library/Fonts/Arial Unicode.ttf")
        mpl.rcParams["font.family"] = Font.get_family()
        mpl.rcParams["font.sans-serif"] = Font.get_name()
mpl.rcParams['axes.unicode_minus'] = False

# Quant Studio 系统错误
class __QS_Error__(Exception):
    """Quant Studio 错误"""
    pass

# Quant Studio 系统对象
class __QS_Object__(HasTraits):
    """Quant Studio 系统对象"""
    def __init__(self, sys_args={}, config_file=None, **kwargs):
        self._QS_Logger = kwargs.pop("logger", None)
        if self._QS_Logger is None: self._QS_Logger = logging.getLogger()
        super().__init__(**kwargs)
예제 #3
0
class TrainAndEvaluate:

    def __init__(self,hyperparameters,seed=0,eval=False,**kwargs) -> None:
        print(hyperparameters)
        #setup matplotlib fonts
        self.prop = FontProperties(fname="NotoColorEmoji.tff")
        plt.rcParams['font.family'] = self.prop.get_family()
        self.eval=eval

        #setup device
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # self.device= torch.device("cpu")
        #setup memory reporter
        self.reporter = MemReporter()


        #setup random seed:
        self.rng = np.random.RandomState(seed=seed)

        self.hyperparameters = hyperparameters
    
    def run(self):
        with wandb.init(project="bike-1b",config=self.hyperparameters, name=self.hyperparameters["exp_name"],save_code=True):
            # access all HPs through wandb.config, so logging matches execution!
            self.config = wandb.config
            # make the model, data, and optimization problem


            self.make()
            # and use them to train the model
            torch.cuda.empty_cache()
            self.reporter.report()
            if self.eval==False:
                self.train()
            print("testing:")
            self.evaluate(dataset='test')
            # and test its final performance
            return self.model 

    def make(self):
        # Make the data
        self.train_loader = self.hyperparameters["dataloader"](data_set_type='train',**self.hyperparameters["dataloader_params"])

        
        self.test_loader = self.hyperparameters["dataloader"](data_set_type='test',**self.hyperparameters["dataloader_params"])
        self.val_loader = self.hyperparameters["dataloader"](data_set_type="val",**self.hyperparameters["dataloader_params"])
        self.tiny_val_loader = self.hyperparameters["dataloader"](root=self.hyperparameters["dataloader_params"]["root"],data_set_type="val",data_set_size = 4,
                                                                    normalize = True,
                                                                    balance = 0.5,
                                                                    num_workers = 20,
                                                                    data_splits = {"val":1.0 },
                                                                    prefetch_factor=1,
                                                                    batch_size = 4,
                                                                    transforms = self.hyperparameters["tiny_transforms"],
                                                                    shuffle=False)
        
        for name,loader in zip(["train","val","test"],[self.train_loader,self.val_loader,self.test_loader]):
            print(f"{name} loader stats:\t number of pairs: {len(loader.dataset)}\t")
            print(f"number of positive pairs: \t {loader.dataset.num_same_ad}")
            print(f"number of negative pairs: \t {loader.dataset.num_diff_ad}")
            print(f"number of Ads used: \t {len(loader.dataset.ad_to_img.keys())}")
            print("#"*5)

        print(f"Training set size: {len(self.train_loader.dataset)}")

  
        if self.hyperparameters["clear_redis"] == True:
            print("flushing redis. Expect a slower first epoch :(")
            self.train_loader.flush_redis()

        #filepaths to small batch of images to vizualise the backbone layer outputs
        self.tiny_filepaths = self.tiny_val_loader.dataset.same_ad_filenames + self.tiny_val_loader.dataset.diff_ad_filenames
        
        # self.tiny_filepaths = list(sum(self.tiny_filepaths, ()))
        # Flatten list of tuples into list
        # self.tiny_filepaths = [a for b in self.tiny_filepaths for b in a]
        self.tiny_filepaths = list(chain.from_iterable(self.tiny_filepaths))
        tiny_image_as, tiny_image_bs, _ = next(iter(self.tiny_val_loader)) 

        # Flatten batch of image pairs to batch of single images
        image_list = [torch.unsqueeze(x,0) for x in chain.from_iterable(zip(tiny_image_as,tiny_image_bs))]
        self.tiny_batch = torch.cat(image_list)

        # Make the model

        self.model = self.hyperparameters["model"](**self.config)

        # Make the loss and optimizer
        try:
            self.criterion = self.hyperparameters["criterion"](**self.hyperparameters)
        except:
            self.criterion = self.hyperparameters["criterion"]()
        self.base_optimizer =  Adam(self.model.parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay)
       
        # load weights and optimizer state if continuing:
        if self.config.starting_epoch>0:
            path = self.config.project_path
            checkpoint = torch.load(join(path,"models",f"model_{self.config.starting_epoch}.tar"))
            self.model.load_state_dict(checkpoint["model_state_dict"])
            self.base_optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

        for g in self.base_optimizer.param_groups:
            g['lr'] = self.config.lr
            g["weight_decay"] = self.config.weight_decay

        # make weights half precision if told to
        if self.config.half_precision:
            self.model.half()  # convert to half precision
            #make sure bn layers are floats for stability
            for layer in self.model.modules():
                if isinstance(layer, nn.BatchNorm2d):
                    layer.float()

        self.model.to(self.device)
        self.move_base_optimizer_to_device()

        self.optimizer = CosineAnnealingLR(self.base_optimizer,last_epoch=-1, T_max = self.hyperparameters["epochs"], eta_min=0.00002)
        for _ in range(self.hyperparameters["starting_epoch"]):
            self.optimizer.step()
    def train(self):
        wandb.watch(self.model,self.criterion,log="all",log_freq=10)

        # Run training and track with wandb
        example_seen = 0  # number of examples seen
        batch_seen = 0

        for epoch in range(self.config.starting_epoch,self.config.epochs):
            self.model.train()
            self.current_epoch = epoch
            with tqdm(total=len(self.train_loader),ncols=120) as pbar_train:
                for data in self.train_loader:
                    torch.cuda.empty_cache()
                    self.image_as, self.image_bs,labels = data[0].to(self.device),data[1].to(self.device),data[2].to(self.device)

                    loss,outputs = self.train_batch([self.image_as,self.image_bs,labels])
                    example_seen +=  data[0].shape[0]
                    batch_seen += 1
                    # Report metrics every 10 batches
                    if batch_seen % 10 == 0:
                        self.model.track_metrics(outputs,epoch,step=example_seen,criterion=self.criterion,loss=loss,split="train")

                    pbar_train.update(1)
                    pbar_train.set_description(f" Epoch: {epoch} loss: {loss:.4f}")
            #validate
            torch.cuda.empty_cache() 
            # reporter.report()
            self.evaluate(dataset='val',epoch=epoch)

    def train_batch(self,data):
        loss,outputs,labels = self.model.train_batch(data,self.criterion,self.device,self.model)

        #backward pass:
        self.base_optimizer.zero_grad()
        loss.backward()
        self.optimizer.step(epoch=self.current_epoch)
        self.base_optimizer.step()
        if self.hyperparameters["model"] == BaselineModel_1b:
            return loss.detach().item(),[[outputs[0].detach().cpu(),outputs[1].detach().cpu()],labels.detach().cpu()]
        elif self.hyperparameters["model"] == BaselineModel_1a:
            return loss.detach().item(),[outputs,labels.detach().cpu()]
        else:
            raise Exception("Splat")
    def evaluate(self,dataset="val",epoch=None):

        path=self.config.project_path
        
        #put model in evaluation mode:
        accuracies = []
        losses = []

        viz_flag =  True
        list_of_outputs = None
        list_of_image_a_outputs = None
        list_of_image_b_outputs = None
        list_of_labels = None

        #Visualise attention maps of the model
        if self.hyperparameters["viz_attention"]:
            self.model.am_viz(self.tiny_batch, self.tiny_filepaths)

        loader = self.val_loader if dataset=="val" else self.test_loader
        with torch.no_grad():
            for data in loader:
                torch.cuda.empty_cache() 
                # reporter.report()
                self.image_as, self.image_bs,labels = data[0].to(self.device),data[1].to(self.device),data[2].to(self.device)
                loss, accuracy, outputs = self.model.evaluate_batch([self.image_as,self.image_bs,labels],self.criterion,self.device,self.model)
                if viz_flag:
                    list_of_image_a_outputs = outputs[0].cpu()
                    list_of_image_b_outputs = outputs[1].cpu()
                    list_of_labels = data[2].cpu()
                    if self.hyperparameters["model"] == BaselineModel_1a:
                        self.model.visualize(data,
                                                outputs[0],
                                                epoch,
                                                number_of_figures=self.hyperparameters["number_of_figures"],
                                                unNormalizer = UnNormalize(loader.means,loader.stds))
                    viz_flag =False
                else:
                    list_of_image_a_outputs = torch.cat((list_of_image_a_outputs, outputs[0].cpu()), 0)
                    list_of_image_b_outputs = torch.cat((list_of_image_b_outputs, outputs[0].cpu()), 0)
                    list_of_labels = torch.cat((list_of_labels,data[2].cpu()),0)

                losses.append(loss)
                accuracies.append(accuracy)
        list_of_outputs = [[list_of_image_a_outputs, list_of_image_b_outputs], list_of_labels]
        if dataset == "val":
            self.model.track_metrics(list_of_outputs,epoch,step=epoch,criterion=self.criterion,loss=np.mean(losses),split="val")
            # wandb.log({"{}_accuracy".format(dataset): np.mean(accuracies),"global_step":epoch})
            # wandb.log({"{}_loss".format(dataset): np.mean(losses),"global_step":epoch})

            # Save the model
            actual_path = join(path,"models")
            if not os.path.exists(actual_path):
                os.makedirs(actual_path)
            #save weights and optimizer
            torch.save({
                "epoch":epoch,
                "model_state_dict":self.model.state_dict(),
                "optimizer_state_dict":self.base_optimizer.state_dict()
            },join(path,"models",f"model_{epoch}.tar"))

        if dataset == "test":
            self.model.track_extra_metrics(list_of_outputs, epoch,split="test")

    def move_base_optimizer_to_device(self):
        for param in self.base_optimizer.state.values():
            # Not sure there are any global tensors in the state dict
            if isinstance(param, torch.Tensor):
                param.data = param.data.to(self.device)
                if param._grad is not None:
                    param._grad.data = param._grad.data.to(self.device)
            elif isinstance(param, dict):
                for subparam in param.values():
                    if isinstance(subparam, torch.Tensor):
                        subparam.data = subparam.data.to(self.device)
                        if subparam._grad is not None:
                            subparam._grad.data = subparam._grad.data.to(self.device)
예제 #4
0
#%%
import json
import re
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates
from wordcloud import WordCloud, STOPWORDS
from matplotlib.font_manager import FontProperties
plt.rc('font', family='DejaVu Sans')
plt.rc('font', serif='Helvetica Neue')
plt.rc('text', usetex='false')
prop = FontProperties(fname='/System/Library/Fonts/Apple Color Emoji.ttc')
plt.rcParams['font.family'] = prop.get_family()
plt.rcParams.update({'font.size': 10})
plt.rcParams['figure.figsize'] = [15, 10]


def fixHex(m):
    hexs = m.string[m.start():m.end()].replace("\\x", "")
    hexStr = bytes.fromhex(hexs).decode('utf-8')
    return hexStr


def fixHexStr(s):
    return re.sub(r'\\x(f0)(?:\\x(..))+', lambda m: fixHex(m),
                  bytes(s, 'unicode-escape').decode('utf-8'))


def messagesPer(messages, freq):
    s = pd.DataFrame(messages)
    s['timestamp'] = s.apply(
예제 #5
0
class Chart:
  def __init__(self, props, subdefaults):
    self.file = None
    self.filename = None
    self.props = {
      "imageType":	  "png",	# Output image type.
      "dpi":		  72,		# Image DPI resolution.
      "width":		  800,		# Output image width in pixels (vs. DPI).
      "height":		  600,		# Output image height in pixels (vs. DPI).
      "padding":	  50,		# Padding around the figure edge, in pixels.
      "textPadding":	  3,		# Padding around text, in pixels.
      "fontFamily":	  "sans-serif",	# Font family.
      "fontName":	  "Luxi Sans",	# Font name.
      "fontSize":	  12,		# Size of non-title text, in pixels.
      "titleFontName":	  "Bitstream Vera Sans",
      "titleFontSize":	  18,		# Size of title text, in pixels.
      "subtitleFontSize": 14,		# Size of subtitle text, in pixels.
      "axtitleFontSize":  16,		# Size of axis title text, in pixels.
      "xAxisScale":	  "lin",	# X axis scale, lin or log.
      "yAxisScale":	  "lin",	# Y axis scale, lin or log.
      "square":		  False,		# Force square layout.
      "integral":   False, #force integral display on legend
      "notitle": False, #suppress title
      "nolabels": False, #suppress axis labels
      "sort":False, #sort items and legend entries by value
      "max_legend":100 #maximum items in the legend
    }
    for (k, v) in subdefaults.iteritems():
      self.props[k] = v
    for (k, v) in props.iteritems():
      self.props[k] = v

    self.font = FontProperties()
    self.font.set_family(self.get('fontFamily'))
    self.font.set_name(self.get('fontName'))
    self.font.set_size(float(self.get('fontSize')))
    self.tfont = FontProperties()
    self.tfont.set_family(self.get('titleFontFamily', self.font.get_family()[-1]))
    self.tfont.set_name(self.get('titleFontName', self.font.get_name()))
    self.tfont.set_size(self.get('titleFontSize', self.font.get_size()))
    self.tfont.set_weight('bold')
    self.sfont = FontProperties()
    self.sfont.set_family(self.get('subtitleFontFamily', self.tfont.get_family()[-1]))
    self.sfont.set_name(self.get('subtitleFontName', self.tfont.get_name()))
    self.sfont.set_size(self.get('subtitleFontSize', self.tfont.get_size()))
    self.afont = FontProperties()
    self.afont.set_family(self.get('axtitleFontFamily', self.tfont.get_family()[-1]))
    self.afont.set_name(self.get('axtitleFontName', self.tfont.get_name()))
    self.afont.set_size(self.get('axtitleFontSize', self.tfont.get_size()))
    self.afont.set_weight('bold')

  def __del__(self):
    if self.file != None:
      os.close(self.file)

  def get(self, key, default=None):
    return getattr(self, key, self.props.get(key, default))

  def draw(self):
    (fig, canvas, w, h) = self.canvas()
    fig.text(.5, .5, "No data.", horizontalalignment='center', fontproperties=self.font)
    return self.save(fig, canvas)

  def legend(self):
    (fig, canvas, w, h) = self.canvas()
    fig.text(.5, .5, "No legend.", horizontalalignment='center', fontproperties=self.font)
    return self.save(fig, canvas)

  def details(self):
    return {}

  def canvas(self):
    type = self.get("imageType", "png")
    fig = Figure()
    if type == "png":
      canvas = FigureCanvasAgg(fig)
      (self.file, self.filename) = mkstemp(".%s" % type)
    elif type == "svg":
      canvas = FigureCanvasSVG(fig)
      (self.file, self.filename) = mkstemp(".%s" % type)
    elif type == "pdf":
      canvas = FigureCanvasPdf(fig)
      (self.file, self.filename) = mkstemp(".%s" % type)
    elif type == "ps" or type == "eps":
      canvas = FigureCanvasPS(fig)
      (self.file, self.filename) = mkstemp(".%s" % type)
    else:
      raise "Invalid render target requested"

    # Set basic figure parameters
    dpi = float(self.get('dpi'))
    (w, h) = (float(self.get('width')), float(self.get('height')))
    (win, hin) = (w/dpi, h/dpi)
    fig.set_size_inches(win, hin)
    fig.set_dpi(dpi)
    fig.set_facecolor('white')
    return (fig, canvas, w, h)

  def save(self, fig, canvas):
    canvas.draw()
    if not self.filename:
      canvas.print_figure(self.file, dpi=float(self.get('dpi')))
      return self.file
    else:
      canvas.print_figure(self.filename, dpi=float(self.get('dpi')))
      f = file(self.filename, "r") # PS backend writes over the file
      os.remove(self.filename)     #    return os.fdopen(self.file)
      os.close(self.file)
      self.file = None
      self.filename = None
      return f

  def prepare(self):
    # Create canvas and determine figure parameters
    (fig, canvas, w, h) = self.canvas()
    dpif = float(self.get('dpi')) / 72
    padding = float(self.get('padding')) * dpif / h
    textPadding = float(self.get('textPadding')) * dpif
    titleFontSize = float(self.get('titleFontSize'))
    axFontSize = float(self.get('axtitleFontSize'))
    subFontSize = float(self.get('subtitleFontSize'))
    title = self.get("title", "").split("\n")
    if not self.get("notitle",False):
      hsub = (len(title)-1) * (subFontSize * dpif + 4) + textPadding
      htitle = hsub + titleFontSize * dpif + textPadding * 2
    else:
      hsub = 0
      htitle = 0
    if self.get("nolabels",False):
      padding = 0

    # Configure axes
    if self.get('square'):
      minsize = 1 - 2*padding
      axrect = (.5 - minsize/2 * h/w, padding, h/w * minsize, minsize)
    else:
      axrect = (padding, padding, 1 - 1.25*padding, 1 - htitle/h - padding)
    ax = fig.add_axes(axrect)
    #frame = ax.get_frame()
    #frame.set_fill(False)

    xlog = (str(self.get('xAxisScale', "lin")) == 'log')
    ylog = (str(self.get('yAxisScale', "lin")) == 'log')
    if xlog:
      ax.semilogx()
    if ylog:
      ax.semilogy()

    setp(ax.get_xticklabels(), fontproperties=self.font)
    setp(ax.get_yticklabels(), fontproperties=self.font)
    setp(ax.get_xticklines(), markeredgewidth=2.0, zorder=4.0)
    setp(ax.get_yticklines(), markeredgewidth=2.0)
    ax.grid(True, alpha=0.25, color='#000000', linewidth=0.1)

    # Set titles
    if not self.get("notitle",False):
      ax.title = ax.text(.5, 1+(hsub+textPadding)/(axrect[-1]*h), title[0],
                      verticalalignment='bottom',
                      horizontalalignment='center',
                      transform=ax.transAxes,
                      clip_box=None,
                      fontproperties=self.tfont)
      ax._set_artist_props(ax.title)
      if len(title) > 1:
        ax.subtitle = ax.text(.5, 1+textPadding/(axrect[-1]*h), "\n".join(title[1:]),
                           verticalalignment='bottom',
                           horizontalalignment='center',
                           transform=ax.transAxes,
                           clip_box=None,
                           fontproperties=self.sfont)

    if not self.get("nolabels",False):
      ax.set_xlabel(self.get("xAxisTitle", ""), fontproperties=self.afont)
      ax.set_ylabel(self.get("yAxisTitle", ""), fontproperties=self.afont)
    return (fig, canvas, ax)

  def map(self):
    return ''

  def make(self):
    img = self.draw().read()
    legend = self.legend().read()
    map = self.map()
    return img,legend,map