Example #1
0
    def normal_schedule(self):
        def in_between(now, start, end):
            if start < end:
                return start <= now < end
            elif end < start:
                return start <= now or now < end
            else:
                return True

        def getDatetime(date_string):
            """Gets datetime from string with format HH:MM. Should be changed to datetime in-built function. """
            return datetime.time(int(date_string.split(":")[0]),
                                 int(date_string.split(":")[1]))

        setpoints_array = self.advise_cfg["Advise"]["Baseline"][
            self.now.weekday()]

        for j in setpoints_array:
            if in_between(
                    self.now.time(),
                    datetime.time(int(j[0].split(":")[0]),
                                  int(j[0].split(":")[1])),
                    datetime.time(int(j[1].split(":")[0]),
                                  int(j[1].split(":")[1]))):
                SetpointLow = j[2]
                SetpointHigh = j[3]
                break

        dataManager = DataManager(self.cfg,
                                  self.advise_cfg,
                                  None,
                                  now=self.now,
                                  zone=self.zone)
        Safety_temps = dataManager.safety_constraints()

        if not isinstance(SetpointLow, (int, float, long)):
            SetpointLow = Safety_temps[0][0]
        if not isinstance(SetpointHigh, (int, float, long)):
            SetpointHigh = Safety_temps[0][1]

        if (self.cfg["Pricing"]["DR"] and in_between(self.now.time(), getDatetime(self.cfg["Pricing"]["DR_Start"]),
                                                     getDatetime(self.cfg["Pricing"]["DR_Finish"]))) \
                or self.now.weekday() == 4:  # TODO REMOVE ALLWAYS HAVING DR ON FRIDAY WHEN DR SUBSCRIBE IS IMPLEMENTED
            SetpointHigh += self.advise_cfg["Advise"][
                "Baseline_Dr_Extend_Percent"]
            SetpointLow -= self.advise_cfg["Advise"][
                "Baseline_Dr_Extend_Percent"]

        # Making sure that the different between setpointHigh and Low is at least the Comfortband
        if SetpointHigh - SetpointLow < self.advise_cfg["Advise"][
                "Minimum_Comfortband_Height"]:
            raise Exception(
                "Warning, the difference between SetpointHigh and SetpointLow is too narrow. Difference: %s. Check the config file schedule."
                % str(SetpointHigh - SetpointLow))

        # making sure that we are not exceeding the Safety temps.
        # Only violates the Comfortband height if safefy temperatures violate it.
        if SetpointLow < Safety_temps[0][0]:
            diff = Safety_temps[0][0] - SetpointLow
            SetpointLow = Safety_temps[0][0]
            SetpointHigh = min(Safety_temps[0][1], SetpointHigh + diff)

        elif SetpointHigh > Safety_temps[0][1]:
            diff = SetpointHigh - Safety_temps[0][1]
            SetpointHigh = Safety_temps[0][1]
            SetpointLow = max(Safety_temps[0][0], SetpointLow - diff)

        p = {
            "override": True,
            "heating_setpoint": SetpointLow,
            "cooling_setpoint": SetpointHigh,
            "mode": 3
        }

        for i in range(self.advise_cfg["Advise"]["Thermostat_Write_Tries"]):
            try:
                self.tstat.write(p)
                print("For zone: %s writing Baseline: %s" %
                      (self.zone, str(p)))
                break
            except:
                if i == self.advise_cfg["Advise"]["Thermostat_Write_Tries"] - 1:
                    e = sys.exc_info()[0]
                    print e
                    return False, p
                continue
        return True, p
Example #2
0
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Data Preparation
# ==================================================

dataManager = DataManager()

# Load data
print("Loading training data...")
x_text, y, _ = dataManager.load_training_data()
print("Finish loading data")

x = []
for data in x_text:
    a = 100 - len(data)
    if a > 0:
        front = a / 2
        back = a - front
        front_vec = [
            np.zeros(dataManager.wordvector_dim + 2) for j in range(front)
        ]
Example #3
0
conn = psycopg2.connect(
    database=url.path[1:],
    user=url.username,
    password=url.password,
    host=url.hostname,
    port=url.port
)
"""

##########################################
# Init bot.
##########################################

from DataManager import DataManager
data_manager = DataManager(conn)

from CianCianBot import CianCianBot
bot = CianCianBot(data_manager)

##########################################
# Init flask backend and linebot facility.
##########################################

from flask import Flask, request, abort

from linebot import (LineBotApi, WebhookHandler)
from linebot.exceptions import (InvalidSignatureError)
from linebot.models import (
    MessageEvent,
    TextMessage,
Example #4
0
 def __init__(self, shop_name=None, *args, **kwargs):
     super(QuotesSpider, self).__init__(*args, **kwargs)
     self.database = DataManager(shop_name)
     self.shop_name = shop_name.lower()
     self.start_urls = self.database.getScrapyUrl()
     self.page_index = 1
Example #5
0
from PIL import Image
#torch
import torch
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms, models
from torch import nn

#parameters Loading
from AppParametersLoader import AppParametersLoader
parameters = AppParametersLoader()
parameters.print_all()

#Data Loading
from DataManager import DataManager
data_manager = DataManager()
data_manager.load_TrainTestValid(parameters.data_dir())

#model definition
from ModelManager import ModelManager
if parameters.arch() == 'vgg16':
    model = models.vgg16(pretrained=True)
    input_nodes = 25088
elif parameters.arch() == 'densenet121':
    model = models.densenet121(pretrained=True)
    input_nodes = 1024

classifier = nn.Sequential(
    nn.Linear(input_nodes, parameters.hidden_units()), nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(parameters.hidden_units(),
Example #6
0
               action="store_true",
               help="Plot % of active cases of population")
args = p.parse_args()

if args.all:
    args.active = True
    args.recovered = True
    args.deaths = True
    args.population_percent = True

logger = Logger("log", autosave=True)

if not args.summary and not args.summary_only and not (
        args.active or args.recovered or args.deaths
        or args.population_percent):
    logger.warning(
        "No output specified (active/recovered etc.). Use the -h option to get more information."
    )
    exit(0)

manager = DataManager(logger, args.countries, True)

if args.summary_only:
    manager.load_summary()
    print_summary()
    exit(0)
elif args.summary:
    manager.load_summary()
    print_summary()

present_history(args.countries)
def train():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    TAG_EMBEDDING_DIM = 64
    VAL_EMBEDDING_DIM = 128
    HIDDEN_DIM = 1500
    NUM_EPOCHS = 2
    LAYER_NUM = 1
    BATCH_SIZE = 256

    data_manager_train = DataManager(TRAIN)
    data_manager_eval = DataManager(TEST)
    warnings.filterwarnings("ignore")

    tag_to_idx, idx_to_tag = data_manager_train.get_tag_dicts()
    val_to_idx, idx_to_val = data_manager_train.get_val_dicts()

    validate_split_idx = int(len(data_manager_eval.get_data()) *
                             0.04)  # 2000 za eval

    data_train = torch.Tensor([(
        tag_to_idx.get((tag, have_children, have_sibling), tag_to_idx["UNK"]),
        val_to_idx.get(val, val_to_idx["UNK"]),
    ) for tag, val, have_children, have_sibling in (
        data_manager_train.get_data())])

    data_eval = torch.Tensor([(
        tag_to_idx.get((tag, have_children, have_sibling), tag_to_idx["UNK"]),
        val_to_idx.get(val, val_to_idx["UNK"]),
    ) for tag, val, have_children, have_sibling in (
        data_manager_eval.get_data()[:validate_split_idx])])

    train_data_loader = torch.utils.data.DataLoader(Dataset(data_train),
                                                    BATCH_SIZE,
                                                    shuffle=True,
                                                    drop_last=True,
                                                    num_workers=8)

    eval_data_loader = torch.utils.data.DataLoader(Dataset(data_eval),
                                                   BATCH_SIZE,
                                                   shuffle=False,
                                                   drop_last=True,
                                                   num_workers=8)

    model_tag = nn.DataParallel(
        AtentionModel(len(tag_to_idx), len(val_to_idx), TAG_EMBEDDING_DIM,
                      VAL_EMBEDDING_DIM, HIDDEN_DIM, LAYER_NUM, False))

    model_val = nn.DataParallel(
        AtentionModel(len(tag_to_idx), len(val_to_idx), TAG_EMBEDDING_DIM,
                      VAL_EMBEDDING_DIM, HIDDEN_DIM, LAYER_NUM, True))

    #model = torch.load(f"D://data//model_attention_1.pickle")
    loss_function = nn.NLLLoss()
    optimizer_tag = optim.Adam(model_tag.parameters())
    optimizer_val = optim.Adam(model_val.parameters())

    # -----------putting models on GPU-------------
    model_tag.cuda()
    model_val.cuda()
    # ---------------------------------------------

    model_iter = 1

    # Sluzi za Tensorboard
    summary_writer = SummaryWriter()

    for epoch in range(NUM_EPOCHS):

        model_tag.train()
        model_val.train()

        for i, (sentence, y) in tqdm(
                enumerate(train_data_loader),
                total=len(train_data_loader),
                desc=f"Epoch: {epoch}",
                unit="batches",
        ):
            global_step = epoch * len(train_data_loader) + i
            size = int(sentence.size(0))

            model_tag.zero_grad()
            model_val.zero_grad()
            model_tag.train()
            model_val.train()

            unk_idx = val_to_idx["UNK"]
            mask_unk = y[:,
                         1] != unk_idx  # mask for all y val that are not UNK

            sentence_tag = sentence.to(device)
            y_pred_tag = model_tag(sentence_tag)
            y = y.to(device)

            correct_tag = (y_pred_tag.argmax(dim=1) == y[:, 0]).sum().item()
            loss_tag = loss_function(y_pred_tag, y[:, 0].long())

            summary_writer.add_scalar("model_tag: train loss", loss_tag,
                                      global_step)
            summary_writer.add_scalar("model_tag: accuracy",
                                      100 * (correct_tag / size), global_step)

            loss_tag.backward()
            nn.utils.clip_grad_value_(model_tag.parameters(), 5.0)
            optimizer_tag.step()

            loss_val = 0
            if mask_unk.sum() > 0:
                # do forward for val_model
                sentence_val = sentence[mask_unk, :, :].to(device)
                y_pred_val = model_val(sentence_val)
                y = y.to(device)

                correct_val = (y_pred_val.argmax(dim=1) == y[mask_unk,
                                                             1]).sum().item()
                loss_val = loss_function(y_pred_val, y[mask_unk, 1].long())

                summary_writer.add_scalar("model_value: train loss", loss_val,
                                          global_step)
                summary_writer.add_scalar("model_value: train accuracy",
                                          100 * (correct_val / size),
                                          global_step)

                loss_val.backward()
                nn.utils.clip_grad_value_(model_val.parameters(), 5.0)
                optimizer_val.step()

            if (i + 1) % 200 == 0:
                tag = f"TRAIN tag accuracy: {100 * (correct_tag / size)}, tag loss: {loss_tag}, "
                val = f"val accuracy: {100 * (correct_val / size)}, val loss: {loss_val}\n"

                with open(f'{DATA_ROOT}log.txt', 'a') as log:
                    log.write(tag)
                    log.write(val)

            TIME_FOR_EVAL = 2500
            if (i + 1) % TIME_FOR_EVAL == 0:
                #evaluation
                torch.save(
                    model_tag,
                    f"D://data//models//tag//budala_{model_iter}.pickle")
                torch.save(
                    model_val,
                    f"D://data//models//val//budala_{model_iter}.pickle")
                model_iter += 1

                model_tag.eval()
                model_val.eval()

                correct_sum_tag = 0
                correct_sum_val = 0
                loss_sum_tag = 0
                loss_sum_val = 0
                size_sum_eval = 0

                with torch.no_grad():

                    for i_eval, (sentence_eval, y_eval) in tqdm(
                            enumerate(eval_data_loader),
                            total=len(eval_data_loader),
                            desc=f"Epoch eval: {global_step//TIME_FOR_EVAL}",
                            unit="batches",
                    ):
                        global_step_eval = (global_step // TIME_FOR_EVAL
                                            ) * len(eval_data_loader) + i_eval
                        size_eval = int(sentence_eval.size(0))
                        size_sum_eval += size_eval
                        sentence_eval = sentence_eval.to(device)

                        unk_idx = val_to_idx["UNK"]
                        mask_unk = y_eval[:, 1] != unk_idx

                        #tag
                        sentence_tag = sentence_eval.to(device)
                        y_pred_tag = model_tag(sentence_tag)
                        y_eval = y_eval.to(device)

                        correct_tag = (y_pred_tag.argmax(
                            dim=1) == y_eval[:, 0]).sum().item()
                        loss_tag = loss_function(y_pred_tag, y_eval[:,
                                                                    0].long())

                        correct_sum_tag += correct_tag
                        loss_sum_tag += loss_tag

                        summary_writer.add_scalar("model_tag: evaluation loss",
                                                  loss_tag, global_step_eval)
                        summary_writer.add_scalar(
                            "model_tag: evaluation accuracy",
                            100 * (correct_tag / size_eval), global_step_eval)

                        if mask_unk.sum() > 0:
                            sentence_eval = sentence_eval[mask_unk].to(device)
                            y_pred_val = model_val(sentence_eval)
                            y_eval = y_eval.to(device)

                            correct_val = (y_pred_val.argmax(
                                dim=1) == y_eval[mask_unk, 1]).sum().item()
                            loss_val = loss_function(
                                y_pred_val, y_eval[mask_unk, 1].long())

                            correct_sum_val += correct_val
                            loss_sum_val += loss_val

                            summary_writer.add_scalar(
                                "model_value: evaluation loss", loss_val,
                                global_step_eval)
                            summary_writer.add_scalar(
                                "model_value: evaluation accuracy",
                                100 * (correct_val / size_eval),
                                global_step_eval)

                    summary_writer.add_scalar(
                        "model_tag: average evaluation loss",
                        loss_sum_tag / len(eval_data_loader),
                        global_step // TIME_FOR_EVAL)
                    summary_writer.add_scalar(
                        "model_tag: average evaluation accuracy",
                        100 * (correct_sum_tag / size_sum_eval),
                        global_step // TIME_FOR_EVAL)

                    summary_writer.add_scalar(
                        "model_value: average evaluation loss",
                        loss_sum_val / len(eval_data_loader),
                        global_step // TIME_FOR_EVAL)
                    summary_writer.add_scalar(
                        "model_value: average evaluation accuracy",
                        100 * (correct_sum_val / size_sum_eval),
                        global_step // TIME_FOR_EVAL)

                    tag = f"EVAL: tag accuracy: {100 * (correct_sum_tag / size_sum_eval)}, tag loss: {loss_sum_tag/len(eval_data_loader)}, "
                    val = f"val accuracy: {100 * (correct_sum_val / size_sum_eval)}, val loss: {loss_sum_val/len(eval_data_loader)}\n"

                    with open(f'{DATA_ROOT}log.txt', 'a') as log:
                        log.write(tag)
                        log.write(val)
Example #8
0
        )

    # function that runs the shortest path algorithm and returns the action produced by the mpc
    def advise(self):
        self.advise_unit.shortest_path(self.root)
        path = self.advise_unit.reconstruct_path()
        action = self.advise_unit.g[path[0]][path[1]]['action']

        if self.plot:
            fig = plotly_figure(self.advise_unit.g, path=path)
            py.plot(fig)

        return action


if __name__ == '__main__':
    from DataManager import DataManager
    with open("config_south.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

    dm = DataManager(cfg)

    adv = Advise(
        datetime.datetime.utcnow().replace(
            tzinfo=pytz.timezone("UTC")).astimezone(
                tz=pytz.timezone("America/Los_Angeles")), dm.preprocess_occ(),
        dm.preprocess_therm(), dm.weather_fetch(), "winter_rates", 0.99995, 15,
        1, True, 87, 55, 0.075, 1.25, 400, 400.)

    print adv.advise()
Example #9
0
# 452 Assignment 2
# Written by: Connor Moore
# Student # : 20011955
# Date: Feb 26, 2019

from Glass_BPNV4 import BPNetwork
from DataManager import DataManager
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
# NNet = BPNetwork(9, 8, 6)

fileWrite = False
data = DataManager()
test = BPNetwork(9, 8, 7)
# initial weights
init_weights = test.weights_ih


# convert back to integer
def convert_decode(arr):
    if arr == [1, 0, 0, 0, 0, 0, 0]:
        return 1
    if arr == [0, 1, 0, 0, 0, 0, 0]:
        return 2
    if arr == [0, 0, 1, 0, 0, 0, 0]:
        return 3
    if arr == [0, 0, 0, 1, 0, 0, 0]:
        return 4
    if arr == [0, 0, 0, 0, 1, 0, 0]:
        return 5
    if arr == [0, 0, 0, 0, 0, 1, 0]:
Example #10
0
def mainTF(options):

  import tensorflow as tf
  from CreateModel import CreateModel
  from DataManager import DataManager
  from DataSet import DataSet

  print "PROCESSING VALIDATION DATA"

  dgSig = DataGetter.DefinedVariables(options.netOp.vNames, signal = True, background = False)
  dgBg = DataGetter.DefinedVariables(options.netOp.vNames,  signal = False, background = True)

  validDataSig = [((dataPath + "/trainingTuple_0_division_1_rpv_stop_850_validation_0.h5", ), 2),]

  validDataSig2 = [((dataPath + "/trainingTuple_0_division_1_stealth_stop_350_SHuHd_validation_0.h5", ), 2),]

  validDataSig3 = [((dataPath + "/trainingTuple_0_division_1_rpv_stop_350_validation_0.h5", ), 2),]

  validDataBgTTbar = [((dataPath + "/trainingTuple_20_division_1_TT_validation_0.h5", ), 1),
                      ((dataPath + "/trainingTuple_2110_division_1_TT_validation_0.h5", ), 1),]
  
  print "Input Variables: ",len(dgSig.getList())

  # Import data
  #print options.runOp.validationSamples
  
  validDataSig =       getValidData(dgSig, validDataSig,      options)
  validDataSig2 =      getValidData(dgSig, validDataSig2,     options)
  validDataSig3 =      getValidData(dgSig, validDataSig3,     options)
  validDataBgTTbar =   getValidData(dgBg,  validDataBgTTbar,  options)

  validDataTTbar = combineValidationData(validDataSig, validDataBgTTbar)
  validDataQCDMC = combineValidationData(validDataSig2, validDataBgTTbar)
  validDataQCDData = combineValidationData(validDataSig3, validDataBgTTbar)

  #get input/output sizes
  #print validData["data"].shape
  nFeatures = validDataTTbar["data"].shape[1]
  nLabels = validDataTTbar["labels"].shape[1]
  nWeights = validDataTTbar["weights"].shape[1]
  nDomain = validDataSig["domain"].shape[1]

  #Training parameters
  l2Reg = options.runOp.l2Reg
  MiniBatchSize = options.runOp.minibatchSize
  nEpoch = options.runOp.nepoch
  ReportInterval = options.runOp.reportInterval
  validationCount = min(options.runOp.nValidationEvents, validDataTTbar["data"].shape[0])

  #scale data inputs to mean 0, stddev 1
  categories = numpy.array(options.netOp.vCategories)
  mins = numpy.zeros(categories.shape, dtype=numpy.float32)
  ptps = numpy.zeros(categories.shape, dtype=numpy.float32)
  for i in xrange(categories.max()):
    selectedCategory = categories == i
    mins[selectedCategory] = validDataTTbar["data"][:,selectedCategory].mean()
    ptps[selectedCategory] = validDataTTbar["data"][:,selectedCategory].std()
  ptps[ptps < 1e-10] = 1.0

  ##Create data manager, this class controls how data is fed to the network for training
  #                 DataSet(fileGlob, xsec, Nevts, kFactor, sig, prescale, rescale)
  signalDataSets = [
                    #DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_350_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    #DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_450_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_550_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_650_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_750_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_850_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),

                    #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_350_SHuHd_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_450_SHuHd_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_550_SHuHd_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_650_SHuHd_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_750_SHuHd_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_850_SHuHd_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),

                    #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_350_SYY_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_450_SYY_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_550_SYY_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_650_SYY_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_750_SYY_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_850_SYY_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),

                    #DataSet(dataPath + "/trainingTuple_*_division_*_rpv_stop_*_training_0.h5",      365.4,  61878989, 1.0, True,  0, 1.0, 1.0, 1),
                    #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_*_SHuHd_training_0.h5",   365.4,  61901450, 1.0, True,  0, 1.0, 1.0, 1),
                    #DataSet(dataPath + "/trainingTuple_*_division_*_stealth_stop_*_SYY_training_0.h5",   365.4,  61901450, 1.0, True,  0, 1.0, 1.0, 1),
  ]

  backgroundDataSets = [DataSet(dataPath + "/trainingTuple_*_division_0_TT_training_0.h5",    365.4,  61878989, 1.0, False, 0, 1.0, 1.0, len(signalDataSets)),]

  dm = DataManager(options.netOp.vNames, nEpoch, nFeatures, nLabels, nDomain, nWeights, options.runOp.ptReweight, signalDataSets, backgroundDataSets)

  # Build the graph
  denseNetwork = [nFeatures]+options.netOp.denseLayers+[nLabels]
  convLayers = options.netOp.convLayers
  rnnNodes = options.netOp.rnnNodes
  rnnLayers = options.netOp.rnnLayers
  mlp = CreateModel(options, denseNetwork, convLayers, rnnNodes, rnnLayers, dm.inputDataQueue, MiniBatchSize, mins, 1.0/ptps)

  #summary writer
  summary_path = "/storage/local/data1/gpuscratch/%s"%(USER)
  os.makedirs(summary_path)
  summary_writer = tf.summary.FileWriter(summary_path + "/log_graph", graph=tf.get_default_graph())

  print "TRAINING NETWORK"

  with tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=8) ) as sess:
    sess.run(tf.global_variables_initializer())

    #start queue runners
    dm.launchQueueThreads(sess)

    print "Reporting validation loss every %i batches with %i events per batch for %i epochs"%(ReportInterval, MiniBatchSize, nEpoch)

    #preload the first data into staging area
    sess.run([mlp.stagingOp], feed_dict={mlp.reg: l2Reg, mlp.keep_prob:options.runOp.keepProb})

    i = 0
    N_TRAIN_SUMMARY = 10

    #flush queue until the sample fraction is approximately equal 
    while dm.continueTrainingLoop():
      result = sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize))
      signalFraction =  result[1][:,0].sum()/MiniBatchSize
      #the first this fraction drops below 0.5 means we are close enough to equal signal/bg fraction 
      if signalFraction < 0.5:
        break

    try:
      while dm.continueTrainingLoop():
        grw = 1.0#1*(2/(1+exp(-i/10000.0)) - 1) #2/(1+exp(-i/10000.0)) - 1 #1000000000000.0*(2/(1+exp(-i/500000.0)) - 1)

        #run validation operations 
        if i == 0 or not i % ReportInterval:
          #run validation operations 
          validation_loss, accuracy, summary_vl = sess.run([mlp.loss_ph, mlp.accuracy, mlp.merged_valid_summary_op], feed_dict={mlp.x_ph: validDataTTbar["data"][:validationCount], mlp.y_ph_: validDataTTbar["labels"][:validationCount], mlp.p_ph_: validDataTTbar["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight:grw, mlp.wgt_ph: validDataTTbar["weights"][:validationCount]})
          summary_writer.add_summary(summary_vl, i/N_TRAIN_SUMMARY)
        
          print('Interval %d, validation accuracy %0.6f, validation loss %0.6f' % (i/ReportInterval, accuracy, validation_loss))
        
          validation_loss, accuracy, summary_vl_QCDMC = sess.run([mlp.loss_ph, mlp.accuracy, mlp.merged_valid_QCDMC_summary_op], feed_dict={mlp.x_ph: validDataQCDMC["data"][:validationCount], mlp.y_ph_: validDataQCDMC["labels"][:validationCount], mlp.p_ph_: validDataQCDMC["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight:grw, mlp.wgt_ph: validDataQCDMC["weights"][:validationCount]})
          summary_writer.add_summary(summary_vl_QCDMC, i/N_TRAIN_SUMMARY)
        
          validation_loss, accuracy, summary_vl_QCDData = sess.run([mlp.loss_ph, mlp.accuracy, mlp.merged_valid_QCDData_summary_op], feed_dict={mlp.x_ph: validDataQCDData["data"][:validationCount], mlp.y_ph_: validDataQCDData["labels"][:validationCount], mlp.p_ph_: validDataQCDData["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight:grw, mlp.wgt_ph: validDataQCDData["weights"][:validationCount]})
          summary_writer.add_summary(summary_vl_QCDData, i/N_TRAIN_SUMMARY)

          #print(sess.run(mlp.x))

        #run training operations 
        if i % N_TRAIN_SUMMARY == 0:
          _, _, summary = sess.run([mlp.stagingOp, mlp.train_step, mlp.merged_train_summary_op], feed_dict={mlp.reg: l2Reg, mlp.keep_prob:options.runOp.keepProb, mlp.training: True, mlp.gradientReversalWeight:grw})
          summary_writer.add_summary(summary, i/N_TRAIN_SUMMARY)
        else:
          sess.run([mlp.stagingOp, mlp.train_step], feed_dict={mlp.reg: l2Reg, mlp.keep_prob:options.runOp.keepProb, mlp.training: True})
        i += 1

      #Should fix bad end of training state
      while dm.continueFlushingQueue():
        sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize))

    except Exception, e:
      # Report exceptions to the coordinator.
      dm.requestStop(e)
    finally:
 def __init__(self):
     """Default constructor"""
     AssociationAnalysis.__init__(self)
     self.connFactory = DBUtil.ConnectionFactory()
     # Default connection source
     self.dataManager = DataManager()
Example #12
0
    def exitValue(self, ctx):
        if ctx.ID() is not None:
            if ctx.ID().getText() in self.memory:
                ctx.data_manager = self.memory[ctx.ID().getText()]
            elif ctx.ID().getText() in self.functions:
                value = LLVMGenerator.call_fun(ctx.ID().getText())
                llvm_name = "%" + str(LLVMGenerator.str_i)
                python_name = llvm_name
                var_type = "i32"
                size = "4"
                is_const = True
                data_manager = DataManager(llvm_name=llvm_name,
                                           python_name=python_name,
                                           var_type=var_type,
                                           size=size,
                                           is_const=is_const)
                ctx.data_manager = data_manager
                self.memory[python_name] = data_manager
                LLVMGenerator.allocate(data_manager=data_manager, value=value)
            else:
                raise RuntimeError("Variable: '" + str(ctx.ID().getText()) +
                                   "' not recognized")
        if ctx.INT() is not None:
            llvm_name = "%" + str(LLVMGenerator.str_i)
            python_name = llvm_name
            var_type = "i32"
            size = "4"
            is_const = True
            data_manager = DataManager(llvm_name=llvm_name,
                                       python_name=python_name,
                                       var_type=var_type,
                                       size=size,
                                       is_const=is_const)
            ctx.data_manager = data_manager
            self.memory[python_name] = data_manager
            value = ctx.INT().getText()
            LLVMGenerator.allocate(data_manager=data_manager, value=value)

        if ctx.DOUBLE() is not None:
            llvm_name = "%" + str(LLVMGenerator.str_i)
            python_name = llvm_name
            var_type = "double"
            size = "8"
            is_const = True
            data_manager = DataManager(llvm_name=llvm_name,
                                       python_name=python_name,
                                       var_type=var_type,
                                       size=size,
                                       is_const=is_const)
            ctx.data_manager = data_manager
            self.memory[python_name] = data_manager
            value = ctx.DOUBLE().getText()
            LLVMGenerator.allocate(data_manager=data_manager, value=value)

        if ctx.STRING() is not None:
            llvm_name = "@str" + str(LLVMGenerator.str_i)
            python_name = llvm_name
            string = ctx.STRING().getText()[1:-1]
            length = len(string) + 2
            var_type = "[{} x i8]".format(length)
            size = "1"
            is_const = True
            data_manager = DataManager(llvm_name=llvm_name,
                                       python_name=python_name,
                                       var_type=var_type,
                                       size=size,
                                       is_const=is_const,
                                       length=length)
            ctx.data_manager = data_manager
            self.memory[python_name] = data_manager
            LLVMGenerator.allocate_string(data_manager=data_manager,
                                          string=string)

        if ctx.arr() is not None:
            llvm_name = "%" + str(LLVMGenerator.str_i)
            python_name = llvm_name
            length = len(ctx.arr().value())
            sub_type = ctx.arr().value()[0].data_manager.var_type
            var_type = "[{} x {}]".format(length, sub_type)
            size = ctx.arr().value()[0].data_manager.size
            is_const = True

            data_manager = DataManager(llvm_name=llvm_name,
                                       python_name=python_name,
                                       var_type=var_type,
                                       size=size,
                                       is_const=is_const,
                                       sub_type=sub_type,
                                       length=length)
            ctx.data_manager = data_manager
            self.memory[python_name] = data_manager
            values = ctx.arr().value()
            LLVMGenerator.allocate_array(data_manager=data_manager,
                                         values=values)

        if ctx.arr_element() is not None:
            if ctx.arr_element().ID().getText() in self.memory:
                id_dm = self.memory[ctx.arr_element().ID().getText()]
                value_dm = ctx.arr_element().value().data_manager
                llvm_name = LLVMGenerator.get_elem(id_dm=id_dm,
                                                   value_dm=value_dm)
                python_name = llvm_name
                var_type = id_dm.sub_type
                size = id_dm.size
                is_const = True
                data_manager = DataManager(llvm_name=llvm_name,
                                           python_name=python_name,
                                           var_type=var_type,
                                           size=size,
                                           is_const=is_const)
                self.memory[python_name] = data_manager
                ctx.data_manager = data_manager
            else:
                raise RuntimeError("Array not recognized")

        if ctx.struct_elem() is not None:
            id_1 = ctx.struct_elem().ID()[0].getText()
            id_2 = ctx.struct_elem().ID()[1].getText()
            struct = self.structures_obj[id_1]
            parent = struct.parent
            index = parent.get_number(id_2)
            id = LLVMGenerator.get_struct_elem(parent, struct, index)

            llvm_name = id
            python_name = llvm_name
            var_type = parent.types[index]
            size = 4
            is_const = False
            data_manager = DataManager(llvm_name, python_name, var_type, size,
                                       is_const)
            ctx.data_manager = data_manager
Example #13
0
    from xbos.services.hod import HodClient
    from xbos.devices.thermostat import Thermostat

    hc = HodClient("xbos/hod", c)

    q = """SELECT ?uri ?zone FROM %s WHERE {
				?tstat rdf:type/rdfs:subClassOf* brick:Thermostat .
				?tstat bf:uri ?uri .
				?tstat bf:controls/bf:feeds ?zone .
				};""" % cfg["Building"]
    import pickle

    with open("../Thermal Data/ciee_thermal_data_demo", "r") as f:
        thermal_data = pickle.load(f)
    dm = DataManager(cfg, advise_cfg, c, ZONE)
    tstat_query_data = hc.do_query(q)['Rows']
    tstats = {tstat["?zone"]: Thermostat(c, tstat["?uri"]) for tstat in tstat_query_data}

    # TODO INTERVAL SHOULD NOT BE IN config_file.yml, THERE SHOULD BE A DIFFERENT INTERVAL FOR EACH ZONE
    from ThermalModel import *

    thermal_model = MPCThermalModel(thermal_data, interval_length=cfg["Interval_Length"])
    thermal_model.setZoneTemperaturesAndFit(
        {dict_zone: dict_tstat.temperature for dict_zone, dict_tstat in tstats.items()}, dt=cfg["Interval_Length"])
    thermal_model.setWeahterPredictions(dm.weather_fetch())

    adv = Advise(["HVAC_Zone_Centralzone"],
                 datetime.datetime.utcnow().replace(tzinfo=pytz.utc).astimezone(
                     tz=pytz.timezone("America/Los_Angeles")),
                 dm.preprocess_occ(),
    def generate_theoretical_data(self,
                                  ticker_tgt,
                                  ticker_src,
                                  step=0.00005,
                                  pos_adj=None,
                                  neg_adj=None):
        """Generates theoretical data for a stock based on another
        stock.

        Given two tickers, a granularity/precision step, and manual
        offset/adjustments, generates more data for the first stock
        (gen) to match the length of data in the second stock (src).
        The generation is based on averages in existing real data and
        assumes an existing correlation between two stocks (e.g. UPRO
        and SPY supposedly have a correlation, or leverage factor of 3)

        Args:
            ticker_tgt: A ticker of the stock for which data should be
                generated, i.e. the target for the generation
            ticker_src: A ticker of the stock to be used as the data
                source to aid in data generation.
                NOTE: This implies the source data should be longer
                than the data for the stock for which the generation
                occurs
            step: A value corresponding to a level of precision, or the
                number of averages calculated and then used to generate
                the data. NOTE: precision != accuracy and a default
                value of 0.00005 is used if one is not given, based on
                testing done on different values
            pos_adj: A value to be used when adjusting movements in the
                positive direction, i.e. a higher value will lead to
                more pronounced positive moves (default: None, if None
                a hardcoded default value will be used depending on
                the ticker, typically 0)
            neg_adj: A value to be used when adjusting movements in the
                negative direction, i.e. a higher value will lead to
                more pronounced negative moves (default: None, if None
                a hardcoded default value will be used depending on
                the ticker, typically 0)

        Returns:
            A tuple of price LUTs, one LUT containing real data
            appended to a part of the generated data, the other
            containing a full set of generated data. The former is
            intended to be used in backtesting strategies, while the
            latter is intended to be used for verifying generation
            accuracy against existing real data.
        """
        db = DataManager()
        # get prices for tickers
        price_lut_tgt = db.build_price_lut(ticker_tgt)
        price_lut_src = db.build_price_lut(ticker_src)
        # before doing any calculations, check if all data is on disk already
        # NOTE: feature disabled for now, as it didnt respond to changes
        # price_lut_gen_part = db.build_price_lut(ticker_tgt + '--GEN-PART')
        # price_lut_gen_full = db.build_price_lut(ticker_tgt + '--GEN-FULL')
        # if (len(price_lut_gen_part) == len(price_lut_src)
        #         and len(price_lut_gen_full) == len(price_lut_src)):
        #     return (price_lut_gen_part, price_lut_gen_full)
        # sorted dates needed later
        src_dates = sorted(price_lut_src.keys())
        gen_dates = sorted(price_lut_tgt.keys())
        # part of data will be real data
        price_lut_gen_part = price_lut_tgt.copy()
        # fully generated data needs a real point as an anchor
        price_lut_gen_full = {gen_dates[0]: price_lut_tgt[gen_dates[0]]}
        # a set of adjustments to use if not otherwise specified
        adjustments = {
            'UPRO': (0, 0),
            'TMF': (0.01, 0.05),
            'TQQQ': (0.025, 0),
            'UDOW': (0, 0.01)
        }
        if step == 0.00005 and pos_adj is None and neg_adj is None:
            try:
                pos_adj = adjustments[ticker_tgt.upper()][0]
                neg_adj = adjustments[ticker_tgt.upper()][1]
            except KeyError:
                pos_adj = 0
                neg_adj = 0
        # calculate % movements and leverage ratio, to use for the SA-LUT
        moves = {}
        ratios = {}
        for i in range(len(gen_dates) - 1):
            change_src = (
                price_lut_src[gen_dates[i + 1]] / price_lut_src[gen_dates[i]] -
                1)
            change_gen = (
                price_lut_tgt[gen_dates[i + 1]] / price_lut_tgt[gen_dates[i]] -
                1)
            moves[gen_dates[i + 1]] = change_src
            if change_src == 0:
                ratios[gen_dates[i + 1]] = 0.0
            else:
                ratios[gen_dates[i + 1]] = change_gen / change_src
        sa_lut = SteppedAvgLookup(step, [moves[d] for d in gen_dates[1:]],
                                  [ratios[d] for d in gen_dates[1:]])
        # generate data going forward from gen data's anchor point
        for i in range(len(gen_dates) - 1):
            move = moves[gen_dates[i + 1]]
            if move >= 0:
                adj = pos_adj
            else:
                adj = neg_adj
            price_lut_gen_full[gen_dates[i + 1]] = \
                (price_lut_gen_full[gen_dates[i]]
                 * (move * (sa_lut.get(move) + adj) + 1))
        # generate data going backwards from gen data's anchor point
        for i in range(len(src_dates) - len(gen_dates) - 1, -1, -1):
            move = (
                price_lut_src[src_dates[i + 1]] / price_lut_src[src_dates[i]] -
                1)
            if move >= 0:
                adj = pos_adj
            else:
                adj = neg_adj
            gen_price = (price_lut_gen_full[src_dates[i + 1]] /
                         (move * (sa_lut.get(move) + adj) + 1))
            price_lut_gen_full[src_dates[i]] = gen_price
            price_lut_gen_part[src_dates[i]] = gen_price
        # save data to disk for faster retrieval next time
        db.write_stock_data(
            ticker_tgt + '--GEN-FULL',
            [[date, '-', '-', '-',
              str(price_lut_gen_full[date]), '-']
             for date in src_dates], False)
        db.write_stock_data(
            ticker_tgt + '--GEN-PART',
            [[date, '-', '-', '-',
              str(price_lut_gen_part[date]), '-']
             for date in src_dates], False)
        return (price_lut_gen_part, price_lut_gen_full)
Example #15
0
        print("epoch ", e, ": dev F1: ", devF1, ", test F1: ", testF1)
        f.write("epoch "+ str(e)+ ": dev F1: "+ str(devF1)+ ", test F1: "+ str(testF1)+ "\n")
        f.close()
        torch.save(model, "checkpoints/model_"+args.logfile+"_"+str(e))

if __name__ == "__main__":
    torch.manual_seed(1)
    if not os.path.exists('checkpoints'):
        os.mkdir('checkpoints')

    argv = sys.argv[1:]
    parser = Parser().getParser()
    args, _ = parser.parse_known_args(argv)

    print("Load data start...")
    dm = DataManager(args.datapath, args.testfile)
    wv = dm.vector

    train_data, test_data, dev_data = dm.data['train'], dm.data['test'], dm.data['dev']
    print("train_data count: ", len(train_data))
    print("test_data  count: ", len(test_data))
    print("dev_data   count: ", len(dev_data))

    model = Model(args.lr, args.dim, args.statedim, wv, dm.relation_count)
    model.cuda()
    if args.start != '':
        pretrain_model = torch.load(args.start) 
        model_dict = model.state_dict() 
        pretrained_dict = pretrain_model.state_dict() 
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 
        model_dict.update(pretrained_dict) 
Example #16
0
    parser.add_argument('--grained', type=int, default=3)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--lr_word_vector', type=float, default=0.1)
    parser.add_argument('--epoch', type=int, default=25)
    parser.add_argument('--batch', type=int, default=25)
    parser.add_argument('--patience', type=int, default=5)

    args, _ = parser.parse_known_args(argv)

    fold = args.fold
    seed = args.seed
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

    data = DataManager(args.dataset, args.seed, grained=3)

    wordlist = data.gen_word()
    train_data, dev_data, test_data = data.gen_data(args.grained)
    print 'Data Generated'

    model = Model(wordlist, argv, len(data.dict_target))
    print 'model instantiated'

    batch_n = (len(train_data) - 1) / args.batch + 1
    optimizer = OptimizerList[args.optimizer](model.params, args.lr,
                                              args.lr_word_vector)
    details = {'loss': [], 'loss_train':[], 'loss_dev':[], 'loss_test':[], \
            'acc_train':[], 'acc_dev':[], 'acc_test':[], 'loss_l2':[]}

    patience = args.patience
Example #17
0
        skip_first = 0
        for pair, df in self.returns_dict.items():
            if skip_first == 0:
                skip_first = 1
                continue
            combined_returns = pd.concat([combined_returns, df],
                                         ignore_index=True,
                                         axis=0)
        self.combined_returns = combined_returns
        self.total_returns = 1
        for returns in combined_returns['returns'].values:
            self.total_returns = self.total_returns * (1 + returns)


if __name__ == "__main__":
    dm = DataManager()
    # This code will just do it for one sector
    # x.data = x.getOneSector(sector="Energy", fromDate="2015-01-01", toDate="2016-09-21")
    dm.getOneSector(sector="Energy",
                    fromDate="2013-01-01",
                    toDate="2015-01-01")
    # x.calcReturns()

    strat = CointStrategyStopLoss
    bt = Backtester(strat, dm.data)
    bt.backtest()
    bt.plot_stuff()
    # bt.strat.CA.plot_pair(['MA','V'], fromDate="2014-01-01", toDate="2018-01-01")
    print(bt.total_returns)
    plt.show()
Example #18
0
    # handle downlaoding from list of tickers
    if args.download:
        for ticker in args.download:
            download_and_write(ticker, args.using)
        exit()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Downloader for historical stock data.")
    parser.add_argument('--using',
                        default='google',
                        nargs=1,
                        help=('a source/API from which to get the data, '
                              'default: google'))
    download_group = parser.add_mutually_exclusive_group(required=True)
    download_group.add_argument('--download',
                                nargs='+',
                                help='the stock ticker(s) to download')
    download_group.add_argument('--download-from',
                                nargs='+',
                                help=('file(s) containing the stock tickers to'
                                      'download'))

    downloader = Downloader()
    db = DataManager()

    main()
    print("Did nothing.")
    exit()
Example #19
0
    parser.add_argument('--fast', type=int, choices=[0, 1], default=0)
    parser.add_argument('--screen', type=int, choices=[0, 1], default=0)
    parser.add_argument('--optimizer', type=str, default='ADAGRAD')
    parser.add_argument('--grained', type=int, default=2)
    parser.add_argument('--lr', type=float, default=0.0001)
    parser.add_argument('--lr_word_vector', type=float, default=0.000007)
    parser.add_argument('--epoch', type=int, default=25)
    parser.add_argument('--batch', type=int, default=10)
    parser.add_argument('--doc_num', type=int, default=50000)
    #parser.add_argument('--reload', type=str, default=True)
    parser.add_argument('--saveto', type=str, default='best_model17.pkl')
    parser.add_argument('--reload_dic', type=str, default=False)
    #parser.add_argument('--reload_dic', type=str, default='dic.pkl')
    args, _ = parser.parse_known_args(argv)
    random.seed(args.seed)
    data = DataManager(args.dataset)
    if args.reload_dic:
        print('reloading dictionary...')
        wordlist = data.load_word(args.reload_dic)

    else:
        print('building dictionary...')
        wordlist = data.gen_word()
        print('saving dictionary...')
        pkl.dump(wordlist, open('dic.pkl', 'wb'), -1)
    print('%d unique words in total' % len(wordlist))
    train_data, test_data = data.gen_data(args.grained)
    random.shuffle(train_data)
    num = int(len(train_data) * 0.11)
    dev_data = train_data[:num]
    train_data_new = train_data[num:]
Example #20
0
def get_all():
    return jsonify(DataManager().get_all_data())
Example #21
0
parser.add_argument('--interval', type=int, default=10)

# 解析设置的参数
args, _ = parser.parse_known_args(argv)

# 配置日志文件格式
logging.basicConfig(
    filename=('log/%s.log' % args.name) * (1 - args.screen),
    level=logging.DEBUG,
    format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',
    datefmt='%H:%M:%S')

# 加载语料文本,情感词,否定词,强度词文本
dm = DataManager(
    args.dataset, {
        'negation': 'negation.txt',
        'intensifier': 'intensifier.txt',
        'sentiment': 'sentiment.txt'
    })

# 从原始语料提取各类别词语
dm.gen_word_list()
# 将词语转成数值列表,构建出训练、验证和测试集
dm.gen_data()

# 构建模型
model = Model(dm.words, dm.grained, argv)
# 实例化评价器
Evaluator = EvaluatorList[dm.grained]


def do_train(label, data):
Example #22
0
def get_node(node):
    res = DataManager().get_node(node, request.json)
    
    return jsonify(res)
Example #23
0
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, BatchNormalization, Flatten, Reshape
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras import utils
from DataManager import DataManager

print("Loading training data...")
dm = DataManager(random_state=0)
training_data, training_labels = dm.loadTrainingData()
testing_data, testing_labels = dm.loadTestingData()
validation_data, validation_labels = dm.loadValidationData()

print('Loaded shapes')
for i in training_data, training_labels, testing_data, testing_labels, validation_data, validation_labels:
    print(i.shape)

input_shape = tuple(training_data.shape[1:])
num_classes = len(np.unique(training_labels))
print("input_shape: {}".format(input_shape))
print("num_classes: {}".format(num_classes))

# Convert to categorical classes
training_labels = utils.to_categorical(training_labels, num_classes)
testing_labels = utils.to_categorical(testing_labels, num_classes)
validation_labels = utils.to_categorical(validation_labels, num_classes)

data_generator = ImageDataGenerator(featurewise_center=True,
                                    featurewise_std_normalization=True,
                                    rotation_range=20,
Example #24
0
def create(node):
    if not request.json:
        abort(400)

    return jsonify(DataManager().add_row(node,request.json))
Example #25
0
def mainTF(options):

    import tensorflow as tf
    from CreateModel import CreateModel
    from DataManager import DataManager
    from DataSet import DataSet

    print "PROCESSING VALIDATION DATA"

    dgSig = DataGetter.DefinedVariables(options.netOp.vNames, signal=True)
    dgBg = DataGetter.DefinedVariables(options.netOp.vNames, background=True)

    validDataSig = [
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_0_division_1_TTbarSingleLepT_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_0_division_1_TTbarSingleLepTbar_validation_0.h5",
          ), 1)
    ]

    validDataBgTTbar = [
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_TTbarSingleLepT_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_TTbarSingleLepTbar_validation_0.h5",
          ), 1),
    ]

    validDataBgQCDMC = [
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT100to200_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT200to300_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT300to500_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT500to700_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT700to1000_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT1000to1500_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT1500to2000_validation_0.h5",
          ), 1),
        (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT2000toInf_validation_0.h5",
          ), 1)
    ]

    validDataBgQCDData = [((
        "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_Data_JetHT_2016_validation_0.h5",
    ), 1)]

    print "Input Variables: ", len(dgSig.getList())

    # Import data
    #print options.runOp.validationSamples

    validDataSig = getValidData(dgSig, validDataSig, options)
    validDataBgTTbar = getValidData(dgBg, validDataBgTTbar, options)
    validDataBgQCDMC = getValidData(dgBg, validDataBgQCDMC, options)
    validDataBgQCDData = getValidData(dgBg, validDataBgQCDData, options)

    validDataTTbar = combineValidationData(validDataSig, validDataBgTTbar)
    validDataQCDMC = combineValidationData(validDataSig, validDataBgQCDMC)
    validDataQCDData = combineValidationData(validDataSig, validDataBgQCDData)

    #get input/output sizes
    #print validData["data"].shape
    nFeatures = validDataTTbar["data"].shape[1]
    nLabels = validDataTTbar["labels"].shape[1]
    nWeights = validDataTTbar["weights"].shape[1]

    #Training parameters
    l2Reg = options.runOp.l2Reg
    MiniBatchSize = options.runOp.minibatchSize
    nEpoch = options.runOp.nepoch
    ReportInterval = options.runOp.reportInterval
    validationCount = min(options.runOp.nValidationEvents,
                          validDataTTbar["data"].shape[0])

    #scale data inputs to mean 0, stddev 1
    categories = numpy.array(options.netOp.vCategories)
    mins = numpy.zeros(categories.shape, dtype=numpy.float32)
    ptps = numpy.zeros(categories.shape, dtype=numpy.float32)
    for i in xrange(categories.max()):
        selectedCategory = categories == i
        mins[selectedCategory] = validDataTTbar["data"][:,
                                                        selectedCategory].mean(
                                                        )
        ptps[selectedCategory] = validDataTTbar["data"][:,
                                                        selectedCategory].std(
                                                        )
    ptps[ptps < 1e-10] = 1.0

    ##Create data manager, this class controls how data is fed to the network for training
    #                 DataSet(fileGlob, xsec, Nevts, kFactor, sig, prescale, rescale)
    signalDataSets = [
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_*_division_0_TTbarSingleLepT_training_*.h5",
            365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 8),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_*_division_0_TTbarSingleLepTbar_training_*.h5",
            365.4, 61901450, 1.0, True, 0, 1.0, 1.0, 8),
    ]

    #pt reweighting histograms
    ttbarRatio = (numpy.array([
        0.7976347, 1.010679, 1.0329635, 1.0712056, 1.1147588, 1.0072196,
        0.79854023, 0.7216115, 0.7717652, 0.851551, 0.8372917
    ]),
                  numpy.array([
                      0., 50., 100., 150., 200., 250., 300., 350., 400., 450.,
                      500., 1e10
                  ]))
    QCDDataRatio = (numpy.array([
        0.50125164, 0.70985824, 1.007087, 1.6701245, 2.5925348, 3.6850858,
        4.924969, 6.2674766, 7.5736594, 8.406105, 7.7529635
    ]),
                    numpy.array([
                        0., 50., 100., 150., 200., 250., 300., 350., 400.,
                        450., 500., 1e10
                    ]))
    QCDMCRatio = (numpy.array([
        0.75231355, 1.0563549, 1.2571484, 1.3007764, 1.0678109, 0.83444154,
        0.641499, 0.49130705, 0.36807108, 0.24333349, 0.06963781
    ]),
                  numpy.array([
                      0., 50., 100., 150., 200., 250., 300., 350., 400., 450.,
                      500., 1e10
                  ]))

    backgroundDataSets = [
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_TTbarSingleLepT_training_*.h5",
            365.4, 61878989, 1.0, False, 0, 1.0, 1.0, 8, ttbarRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_TTbarSingleLepTbar_training_*.h5",
            365.4, 61901450, 1.0, False, 0, 1.0, 1.0, 8, ttbarRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_Data_JetHT_2016_training_*.h5",
            1.0,
            1,
            1.0,
            False,
            1,
            1.0,
            1.0,
            8,
            include=False),  #QCDDataRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT100to200_training_*.h5",
            27990000,
            80684349,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio), 
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT200to300_training_*.h5",
            1712000,
            57580393,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT300to500_training_*.h5",
            347700,
            54537903,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT500to700_training_*.h5",
            32100,
            62271343,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT700to1000_training_*.h5",
            6831,
            45232316,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT1000to1500_training_*.h5",
            1207,
            15127293,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT1500to2000_training_*.h5",
            119.9,
            11826702,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
        DataSet(
            "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT2000toInf_training_*.h5",
            25.24,
            6039005,
            0.0,
            False,
            2,
            1.0,
            1.0,
            1,
            include=False),  #QCDMCRatio),
    ]

    dm = DataManager(options.netOp.vNames, nEpoch, nFeatures, nLabels, 2,
                     nWeights, options.runOp.ptReweight, signalDataSets,
                     backgroundDataSets)

    # Build the graph
    denseNetwork = [nFeatures] + options.netOp.denseLayers + [nLabels]
    convLayers = options.netOp.convLayers
    rnnNodes = options.netOp.rnnNodes
    rnnLayers = options.netOp.rnnLayers
    mlp = CreateModel(options, denseNetwork, convLayers, rnnNodes, rnnLayers,
                      dm.inputDataQueue, MiniBatchSize, mins, 1.0 / ptps)

    #summary writer
    summary_writer = tf.summary.FileWriter(options.runOp.directory +
                                           "log_graph",
                                           graph=tf.get_default_graph())

    print "TRAINING NETWORK"

    with tf.Session(config=tf.ConfigProto(
            intra_op_parallelism_threads=8)) as sess:
        sess.run(tf.global_variables_initializer())

        #start queue runners
        dm.launchQueueThreads(sess)

        print "Reporting validation loss every %i batches with %i events per batch for %i epochs" % (
            ReportInterval, MiniBatchSize, nEpoch)

        #preload the first data into staging area
        sess.run([mlp.stagingOp],
                 feed_dict={
                     mlp.reg: l2Reg,
                     mlp.keep_prob: options.runOp.keepProb
                 })

        i = 0
        N_TRAIN_SUMMARY = 10

        #flush queue until the sample fraction is approximately equal
        flushctr = 200
        while dm.continueTrainingLoop():
            result = sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize))
            signalCount = result[1][:, 0].sum()
            bgCount = result[1][:, 1].sum()
            signalFraction = signalCount / (signalCount + bgCount)
            #the first this fraction drops below 0.5 means we are close enough to equal signal/bg fraction
            if signalFraction < 0.5:
                flushctr -= 1
                if flushctr <= 0:
                    break

        try:
            while dm.continueTrainingLoop():

                grw = 2 / (1 + exp(-i / 10000.0)) - 1

                #run validation operations
                if i == 0 or not i % ReportInterval:
                    #run validation operations
                    validation_loss, accuracy, summary_vl = sess.run(
                        [
                            mlp.loss_ph, mlp.accuracy,
                            mlp.merged_valid_summary_op
                        ],
                        feed_dict={
                            mlp.x_ph: validDataTTbar["data"][:validationCount],
                            mlp.y_ph_:
                            validDataTTbar["labels"][:validationCount],
                            mlp.p_ph_:
                            validDataTTbar["domain"][:validationCount],
                            mlp.reg: l2Reg,
                            mlp.gradientReversalWeight: grw,
                            mlp.wgt_ph:
                            validDataTTbar["weights"][:validationCount]
                        })
                    summary_writer.add_summary(summary_vl, i / N_TRAIN_SUMMARY)

                    print(
                        'Interval %d, validation accuracy %0.6f, validation loss %0.6f'
                        % (i / ReportInterval, accuracy, validation_loss))

                    validation_loss, accuracy, summary_vl_QCDMC = sess.run(
                        [
                            mlp.loss_ph, mlp.accuracy,
                            mlp.merged_valid_QCDMC_summary_op
                        ],
                        feed_dict={
                            mlp.x_ph: validDataQCDMC["data"][:validationCount],
                            mlp.y_ph_:
                            validDataQCDMC["labels"][:validationCount],
                            mlp.p_ph_:
                            validDataQCDMC["domain"][:validationCount],
                            mlp.reg: l2Reg,
                            mlp.gradientReversalWeight: grw,
                            mlp.wgt_ph:
                            validDataQCDMC["weights"][:validationCount]
                        })
                    summary_writer.add_summary(summary_vl_QCDMC,
                                               i / N_TRAIN_SUMMARY)

                    validation_loss, accuracy, summary_vl_QCDData = sess.run(
                        [
                            mlp.loss_ph, mlp.accuracy,
                            mlp.merged_valid_QCDData_summary_op
                        ],
                        feed_dict={
                            mlp.x_ph:
                            validDataQCDData["data"][:validationCount],
                            mlp.y_ph_:
                            validDataQCDData["labels"][:validationCount],
                            mlp.p_ph_:
                            validDataQCDData["domain"][:validationCount],
                            mlp.reg:
                            l2Reg,
                            mlp.gradientReversalWeight:
                            grw,
                            mlp.wgt_ph:
                            validDataQCDData["weights"][:validationCount]
                        })
                    summary_writer.add_summary(summary_vl_QCDData,
                                               i / N_TRAIN_SUMMARY)

                #run training operations
                if i % N_TRAIN_SUMMARY == 0:
                    _, _, summary = sess.run(
                        [
                            mlp.stagingOp, mlp.train_step,
                            mlp.merged_train_summary_op
                        ],
                        feed_dict={
                            mlp.reg: l2Reg,
                            mlp.keep_prob: options.runOp.keepProb,
                            mlp.training: True,
                            mlp.gradientReversalWeight: grw
                        })
                    summary_writer.add_summary(summary, i / N_TRAIN_SUMMARY)
                else:
                    sess.run(
                        [mlp.stagingOp, mlp.train_step],
                        feed_dict={
                            mlp.reg: l2Reg,
                            mlp.keep_prob: options.runOp.keepProb,
                            mlp.training: True
                        })
                i += 1

            while dm.continueFlushingQueue():
                sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize))

        except Exception, e:
            # Report exceptions to the coordinator.
            dm.requestStop(e)
        finally:
Example #26
0
def update(node,row_id):
    if not request.json:
        abort(400)
    
    return jsonify(DataManager().edit_row(node,row_id,request.json))
Example #27
0
# ========== NetManager呼び出し ==========
net_cls = NetManager()

# ========== PathManager呼び出し ==========
path_cls = PathManager(tfrecord_folder=TFRECORD_FOLDER,
                       output_rootfolder=OUT_ROOT_FOLDER,
                       epoch_output_rootfolder=EPOCH_OUT_ROOT_FOLDER)
path_cls.all_makedirs()  # 結果保存フォルダ生成

# ========== DataSet呼び出し ==========
# プロパティデータ読み込み
df = pd.read_csv(path_cls.get_property_path())
shuf_train_ds_cls = DataManager(
    tfrecord_path=path_cls.get_train_ds_path(),
    img_root=IMAGE_ROOT_PATH,
    batch_size=SHUF_LEARN_BATCH_SIZE,
    net_cls=net_cls,
    data_n=df.at[0, 'total_learn_data'],
    suffle_buffer=SUFFLE_BUFFER_SIZE,
)
train_ds_cls = DataManager(
    tfrecord_path=path_cls.get_train_ds_path(),
    img_root=IMAGE_ROOT_PATH,
    batch_size=LEARN_BATCH_SIZE,
    net_cls=net_cls,
    data_n=df.at[0, 'total_learn_data'],
)
test_ds_cls = DataManager(
    tfrecord_path=path_cls.get_test_ds_path(),
    img_root=IMAGE_ROOT_PATH,
    batch_size=TEST_BATCH_SIZE,
    net_cls=net_cls,
Example #28
0
def delete(node,row_id):
    return jsonify(DataManager().delete(node,row_id))
Example #29
0
            return self.predictions[0][now_time]
        else:
            return self.predictions[now_time]


if __name__ == '__main__':
    import yaml
    import sys

    sys.path.insert(0, '..')
    from DataManager import DataManager
    from xbos import get_client

    with open("../config_file.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

    with open("../Buildings/ciee/ZoneConfigs/HVAC_Zone_Eastzone.yml", 'r') as ymlfile:
        advise_cfg = yaml.load(ymlfile)

    if cfg["Server"]:
        c = get_client(agent=cfg["Agent_IP"], entity=cfg["Entity_File"])
    else:
        c = get_client()

    dm = DataManager(cfg, advise_cfg, c, "HVAC_Zone_Eastzone")

    occ = Occupancy(dm.preprocess_occ(), 15, 4, 4, advise_cfg["Advise"]["Occupancy_Sensors"])
    for i in range(10):
        print "Intervals ahead: " + str(i)
        print occ.occ(i)
Example #30
0
def main():
    '''
    Runs cross validation on the input Twitter data.
    '''
    args = parser.parse_args()

    # Extract the data for LDA and divide into 10 folds
    dm = DataManager(args.train_path, 'twitter')
    if settings.DEBUG: print("Loading data...")

    # Time the process of loading in the data.
    start = time.perf_counter()

    # Load the data (possibly from the cache, if it exists)
    dm.load_data(args.cache_path)
    # The number of folds is passed in as a command-line arg
    dm.divide_into_folds(args.num_folds)
    end = time.perf_counter()
    if settings.DEBUG:
        print(
            f"Preparing the data (loading, dividing into folds) took {end-start:0.4f} seconds."
        )

    # Initialize the best k and best likelihood, along with the list of k values to try
    best_k = None
    best_likelihood = -float("inf")

    # Get the list of topic numbers to try as a command line arg too.
    possible_k_values = args.topic_numbers

    # Store the results to the result path. Add the headers if the file doesn't exist yet.
    if not os.path.exists(args.results_path):
        fout = open(args.results_path, "w")
        out_writer = csv.writer(fout)
        out_writer.writerow([
            "Model", "k", "Average Likelihood", "Number of Documents", "Source"
        ])
    else:
        fout = open(args.results_path, "w")
        out_writer = csv.writer(fout)

    # Run cross validation once for each parameter value
    for k in possible_k_values:

        if settings.DEBUG: print(f"Trying k={k} components...")

        # We will create a list of accuracies for each validation set
        likelihoods = []
        for i in range(dm.get_num_folds()):
            if settings.DEBUG:
                print(f"    Iteration {i+1}/{dm.get_num_folds()}")

            # Update the validation fold.
            dm.set_validation(i)

            # Retrieve the training data and validation set.
            train, validate = get_data_for_LDA(dm)
            start = time.perf_counter()
            # Train the model with the param choice.
            lda_model = run_LDA_for_CV(train, k)
            # Compute the resulting accuracy on the validation set.
            likelihood = lda_model.score(validate)
            end = time.perf_counter()
            if settings.DEBUG: print(f"        likelihood = {likelihood}")
            if settings.DEBUG:
                print(f"        Training took {end-start:0.4f} seconds.")

            likelihoods.append(likelihood)

        avg_likelihood = sum(likelihoods) / len(likelihoods)
        out_writer.writerow([
            "LDA", k, avg_likelihood,
            len(dm.get_all_fold_data()), settings.TWITTER_DIR
        ])
        if settings.DEBUG: print(f"    avg_likelihood = {avg_likelihood}")

        if avg_likelihood > best_likelihood:
            best_likelihood = avg_likelihood
            best_k = k

    print(
        f"Best average likelihood found was {best_likelihood} with parameter value k={best_k}"
    )
    fout.close()