Beispiel #1
0
    def prepare(self):
        self.base_url = 'http://sae.wsu.edu/ttc/'

        log_level = logging.DEBUG if getattr(self, 'debug', False) else logging.INFO
        self.logger = get_logger(__name__,
                self.log_file or 'stderr', 
                log_level=log_level)

        self.engine = create_engine('sqlite:///{}'.format(self.database_location),
                connect_args={'check_same_thread':False})

        Base.metadata.create_all(self.engine)
        self.Session = sessionmaker(bind=self.engine)  
        self.session = self.Session()

        self.visited_urls = set()
Beispiel #2
0
    def __init__(self, args):
        args.cache_dir = os.path.join(
            "cache",
            '{}_{}shot_K{}_seed{}'.format(args.label, args.shot,
                                          args.steps_sib, args.seed_sib))
        args.log_dir = os.path.join(args.cache_dir, 'logs')
        args.out_dir = os.path.join(args.cache_dir, 'outputs')
        create_dirs([args.cache_dir, args.log_dir, args.out_dir])

        logger = get_logger(args.log_dir, args.label)
        set_random_seed(args.seed_sib)
        logger.info('Start experiment with random seed: {:d}'.format(
            args.seed_sib))
        logger.info(args)
        self.logger = logger

        train_transform, val_transform, self.input_w, self.input_h, train_dir, val_dir, test_dir, episode_json, nb_cls = dataset_setting(
            args.dataset, args.way)

        self.train_loader = BatchSampler(imgDir=train_dir,
                                         nClsEpisode=args.way,
                                         nSupport=args.shot,
                                         nQuery=args.train_query,
                                         transform=train_transform,
                                         useGPU=True,
                                         inputW=self.input_w,
                                         inputH=self.input_h,
                                         batchSize=args.batchsize_sib)
        self.test_loader = EpisodeSampler(imgDir=test_dir,
                                          nClsEpisode=args.way,
                                          nSupport=args.shot,
                                          nQuery=args.train_query,
                                          transform=val_transform,
                                          useGPU=True,
                                          inputW=self.input_w,
                                          inputH=self.input_h)
        self.val_loader = EpisodeSampler(imgDir=val_dir,
                                         nClsEpisode=args.way,
                                         nSupport=args.shot,
                                         nQuery=args.train_query,
                                         transform=val_transform,
                                         useGPU=True,
                                         inputW=self.input_w,
                                         inputH=self.input_h)
        self.args = args
Beispiel #3
0
    def prepare(self):
        self.base_url = 'http://sae.wsu.edu/ttc/'

        log_level = logging.DEBUG if getattr(self, 'debug',
                                             False) else logging.INFO
        self.logger = get_logger(__name__,
                                 self.log_file or 'stderr',
                                 log_level=log_level)

        self.engine = create_engine('sqlite:///{}'.format(
            self.database_location),
                                    connect_args={'check_same_thread': False})

        Base.metadata.create_all(self.engine)
        self.Session = sessionmaker(bind=self.engine)
        self.session = self.Session()

        self.visited_urls = set()
Beispiel #4
0
def main(cfg):
    print(cfg)
    print()

    # setup logdir, writer and logger
    logdir = os.path.join(cfg['root'], cfg['testing']['logdir'])

    if not os.path.exists(logdir):
        os.makedirs(logdir)

    tester_name = cfg['evaluator']

    with open(os.path.join(logdir, tester_name + '.yml'), 'w') as fp:
        yaml.dump(cfg, fp)

    logger = get_logger(logdir)

    print('Tester ', tester_name, __file__)
    Tester = get_tester(tester_name)(cfg, logdir, logger)
    print()

    # start testing
    Tester.test()
Beispiel #5
0
def main(cfg):
    print(cfg)
    print()

    # setup logdir, writer and logger
    logdir = os.path.join(cfg['root'], cfg['logdir'])

    if not os.path.exists(logdir):
        os.makedirs(logdir)

    writer = SummaryWriter(log_dir=logdir)

    trainer_name = cfg['trainer']

    with open(os.path.join(logdir,trainer_name+'.yml'), 'w') as fp:
        yaml.dump(cfg, fp)

    logger  = get_logger(logdir)

    Trainer = get_trainer(trainer_name)(cfg, writer, logger)
    print()

    # start training
    Trainer.train()
Beispiel #6
0
import os
import argparse
import logging
from io import BytesIO
from utils.misc import get_logger, extract_text_from_unsearchable_pdf
logger = get_logger(__name__)
logger.setLevel(logging.INFO)


def process_file(file_path):
    """
    Extract the text from a given file
    :param file_path: str absolute file path
    :return: str
    """
    with open(file_path, 'rb') as file_in:
        pdf_byte_content = BytesIO(file_in.read())
        corpus = extract_text_from_unsearchable_pdf(pdf_byte_content)
        return corpus


def main(dir_path):
    """
    Process all the PDF files in a given dir and  write
    their content in .txt files, inheriting the filenames.
    :param dir_path: str input directory
    :return: None
    """
    logger.info("Reading from {0}".format(dir_path))
    for filename in os.listdir(dir_path):
        if filename.endswith('.pdf'):
#   fixed length embeddings for each segment so the processing can continue in a more straigh-
#   forward way. The class "tensorflow_code.nn_def.tf_pool" requires an input feature variable,
#   a corresponding indices variable and a list of NN components that the features should be
#   sent through, as well as a pooling function as input. See the code for details.

# Todo:
# * Fix so that variable duration segments can be used for reconstruction dev. loss
# * Fix so that segment for embedding and reconstruction loss never overlaps? Now both of them are randomly selected.
#

floatX = 'float32'

import sys, os, cPickle, copy, subprocess, time, inspect, re  #, h5py

from utils.misc import get_logger
log = get_logger()

from tensorflow_code import pool_fkns

import numpy as np
import utils.mbatch_generation
from utils.load_data import *
from utils.model_utils import load_davids_kaldi_model
import tensorflow_code.optim
import tensorflow_code.models
import tensorflow as tf
from utils.train import train_nn, get_train_batch_fkn
from tensorflow_code.dplda import mBW_2_PQck
from tensorflow_code.dplda import p_eff, llrThreshold, labMat2weight, lab2matrix
from tensorflow_code.load_save import save_tf_model
from utils.evaluation import get_eval_info
Beispiel #8
0
from utils.misc import get_logger
from config import main_conf as mc
from config import ranking_conf as rc
import pandas as pd
import logging

rank_log = get_logger(__name__)
rank_log.setLevel(logging.INFO)


def rank_exe(df, pred_column):
    lsp = 0
    i = 0
    df_daily_profit = pd.DataFrame(columns=[rc["rank_df"]])
    dfs_long = []
    dfs_short = []
    df_rank = df.sort_values([mc["date_clm"], pred_column],
                             ascending=[True, False])
    date_list = list(df_rank.Date.unique())
    date_list.sort()
    for date in date_list[:-1]:
        i += 1
        data_date = df_rank[df_rank.Date == date]
        data_date = data_date.drop_duplicates(subset=[mc["ticker"]
                                                      ])  # is that useful?
        long_part = data_date.iloc[:rc["stocks_number"]]
        short_part = data_date.iloc[-rc["stocks_number"]:]
        # print("long: ", pred_column, "\n", long_part[["Date", "ticker", pred_column, mc["label"]]].head(5))
        # print("short: ", pred_column, "\n", short_part[["Date", "ticker", pred_column, mc["label"]]].head(5))
        dp, lsp = long_short_profit(long_part, short_part, lsp)
        df_daily_profit.loc[i] = [date, lsp, dp]
Beispiel #9
0
main_path = args.exp_dir

folders = ["event", "model", "log", "param"]
if args.setup in ['single']:
    folders.append('decoding')

for name in folders:
    folder = "{}/{}/".format(name, args.experiment) if hasattr(args, "experiment") else name + '/'
    args.__dict__["{}_path".format(name)] = os.path.join(args.exp_dir, folder)
    Path(args.__dict__["{}_path".format(name)]).mkdir(parents=True, exist_ok=True)

if not hasattr(args, 'hp_str'):
    args.hp_str = get_hp_str(args)
    args.prefix = strftime("%m.%d_%H.%M.", localtime())
    args.id_str = args.prefix + "_" + args.hp_str
logger = get_logger(args)
set_seed(args)

# Save config
args.save((str(args.param_path + args.id_str)))

# Data
train_it, dev_it = get_data(args)

args.__dict__.update({'logger': logger})
args.logger.info(args)
args.logger.info('Starting with HPARAMS: {}'.format(args.hp_str))

# Model
model = get_model(args)
extra_input = {}
Beispiel #10
0
import matplotlib.pyplot as plt
import pandas as pd
from utils.misc import get_logger
import logging
import os

plotter_log = get_logger(__name__)
plotter_log.setLevel(logging.INFO)


class plotter(object):
    def __init__(self, dataframe, train, test, out_path):
        self.dataframe = dataframe
        self.train = train
        self.test = test
        self.out_path = out_path

    def plot_clop(self):
        """
        Plot "Close - Open" variable
        """
        y = self.dataframe.Clop
        y = pd.Series(y)
        fig = plt.figure()
        y.plot(figsize=(22, 8), color='teal')
        plt.title('Clop variable')
        figname = os.path.join(self.out_path, "clop" + ".png")
        plt.savefig(figname, dpi=200)
        plt.close()

    def plot_actual_day_rt(self):
Beispiel #11
0
from config import main_conf as mc
import logging
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from ta.momentum import ROCIndicator, WilliamsRIndicator, RSIIndicator, stochrsi_k
from ta.trend import EMAIndicator, MACD
from ta.volume import AccDistIndexIndicator
from utils.misc import get_logger

seldf_log = get_logger(__name__)
seldf_log.setLevel(logging.INFO)


def preprocess_df(df, csv_string):
    """
    It creates an unique list of dfs by reading
    csv files. It select list of features by
    looking at config file.
    :param df: pandas df
    :param csv_string: stock name string
    """
    df = df[mc["columns"]]
    df = create_ticker(df, csv_string)
    # lagged features
    df = lagged_daily_price_return(df)
    df = create_next_day_return(df)
    df = lagged_daily_indicators(df)
    # Indicators
    df = create_tech_indicators(df)
    scaled_df = scale_df(df)
    return df, scaled_df
Beispiel #12
0
import logging
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression, Lasso, ElasticNet, LogisticRegression # RidgeCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.svm import LinearSVR, SVR
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from utils.misc import get_logger
from config import regress_models_conf as rmc
from sklearn.model_selection import TimeSeriesSplit, cross_val_predict, GridSearchCV
from mlxtend.regressor import StackingRegressor as StackingRegresorMLX
from itertools import combinations

model_log = get_logger(__name__)
model_log.setLevel(logging.INFO)


class RegressorModels(object):
    def __init__(self, x_train, y_train, x_test):
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test

    def random_forest_regr(self):
        """
        Random Forest fit
        :return: prediction
        """
        model = RandomForestRegressor(random_state=123)
        model.fit(self.x_train, self.y_train)
Beispiel #13
0
import matplotlib.pyplot as plt
from utils.misc import get_logger
import os
import logging

plot_log = get_logger(__name__)
plot_log.setLevel(logging.INFO)

def linear_regression_plot(y_test, train_pred, out_path):
    plot_log.info("in linear regression plot")
    plt.plot(y_test.values, color='r', linewidth=1, label='test')
    plt.plot(train_pred, '-.', color="b", linewidth=1, label='pred')
    plt.ylabel('\u0394 Rel')
    plt.legend(loc='upper center', frameon=False)
    plt.title('LinearRegression')
    figname = os.path.join(out_path, "linearRegr_plot"+".png")
    plt.savefig(figname, dpi=200)
    plt.close()

def linear_regression_scatter(y_test, train_pred, out_path):
    plot_log.info("in linear regression scatter")
    plt.scatter(y_test.values, train_pred)
    plt.xlabel('\u0394 Rel')
    plt.ylabel('\u0394 Rel predicted')
    plt.title('\u0394 Rel comparison')
    plt.xlim(-0.2, 0.2)
    plt.ylim(-0.2, 0.2)
    figname = os.path.join(out_path, "linearRegr_scatter"+".png")
    plt.savefig(figname, dpi=200)
    plt.close()