def prepare(self): self.base_url = 'http://sae.wsu.edu/ttc/' log_level = logging.DEBUG if getattr(self, 'debug', False) else logging.INFO self.logger = get_logger(__name__, self.log_file or 'stderr', log_level=log_level) self.engine = create_engine('sqlite:///{}'.format(self.database_location), connect_args={'check_same_thread':False}) Base.metadata.create_all(self.engine) self.Session = sessionmaker(bind=self.engine) self.session = self.Session() self.visited_urls = set()
def __init__(self, args): args.cache_dir = os.path.join( "cache", '{}_{}shot_K{}_seed{}'.format(args.label, args.shot, args.steps_sib, args.seed_sib)) args.log_dir = os.path.join(args.cache_dir, 'logs') args.out_dir = os.path.join(args.cache_dir, 'outputs') create_dirs([args.cache_dir, args.log_dir, args.out_dir]) logger = get_logger(args.log_dir, args.label) set_random_seed(args.seed_sib) logger.info('Start experiment with random seed: {:d}'.format( args.seed_sib)) logger.info(args) self.logger = logger train_transform, val_transform, self.input_w, self.input_h, train_dir, val_dir, test_dir, episode_json, nb_cls = dataset_setting( args.dataset, args.way) self.train_loader = BatchSampler(imgDir=train_dir, nClsEpisode=args.way, nSupport=args.shot, nQuery=args.train_query, transform=train_transform, useGPU=True, inputW=self.input_w, inputH=self.input_h, batchSize=args.batchsize_sib) self.test_loader = EpisodeSampler(imgDir=test_dir, nClsEpisode=args.way, nSupport=args.shot, nQuery=args.train_query, transform=val_transform, useGPU=True, inputW=self.input_w, inputH=self.input_h) self.val_loader = EpisodeSampler(imgDir=val_dir, nClsEpisode=args.way, nSupport=args.shot, nQuery=args.train_query, transform=val_transform, useGPU=True, inputW=self.input_w, inputH=self.input_h) self.args = args
def prepare(self): self.base_url = 'http://sae.wsu.edu/ttc/' log_level = logging.DEBUG if getattr(self, 'debug', False) else logging.INFO self.logger = get_logger(__name__, self.log_file or 'stderr', log_level=log_level) self.engine = create_engine('sqlite:///{}'.format( self.database_location), connect_args={'check_same_thread': False}) Base.metadata.create_all(self.engine) self.Session = sessionmaker(bind=self.engine) self.session = self.Session() self.visited_urls = set()
def main(cfg): print(cfg) print() # setup logdir, writer and logger logdir = os.path.join(cfg['root'], cfg['testing']['logdir']) if not os.path.exists(logdir): os.makedirs(logdir) tester_name = cfg['evaluator'] with open(os.path.join(logdir, tester_name + '.yml'), 'w') as fp: yaml.dump(cfg, fp) logger = get_logger(logdir) print('Tester ', tester_name, __file__) Tester = get_tester(tester_name)(cfg, logdir, logger) print() # start testing Tester.test()
def main(cfg): print(cfg) print() # setup logdir, writer and logger logdir = os.path.join(cfg['root'], cfg['logdir']) if not os.path.exists(logdir): os.makedirs(logdir) writer = SummaryWriter(log_dir=logdir) trainer_name = cfg['trainer'] with open(os.path.join(logdir,trainer_name+'.yml'), 'w') as fp: yaml.dump(cfg, fp) logger = get_logger(logdir) Trainer = get_trainer(trainer_name)(cfg, writer, logger) print() # start training Trainer.train()
import os import argparse import logging from io import BytesIO from utils.misc import get_logger, extract_text_from_unsearchable_pdf logger = get_logger(__name__) logger.setLevel(logging.INFO) def process_file(file_path): """ Extract the text from a given file :param file_path: str absolute file path :return: str """ with open(file_path, 'rb') as file_in: pdf_byte_content = BytesIO(file_in.read()) corpus = extract_text_from_unsearchable_pdf(pdf_byte_content) return corpus def main(dir_path): """ Process all the PDF files in a given dir and write their content in .txt files, inheriting the filenames. :param dir_path: str input directory :return: None """ logger.info("Reading from {0}".format(dir_path)) for filename in os.listdir(dir_path): if filename.endswith('.pdf'):
# fixed length embeddings for each segment so the processing can continue in a more straigh- # forward way. The class "tensorflow_code.nn_def.tf_pool" requires an input feature variable, # a corresponding indices variable and a list of NN components that the features should be # sent through, as well as a pooling function as input. See the code for details. # Todo: # * Fix so that variable duration segments can be used for reconstruction dev. loss # * Fix so that segment for embedding and reconstruction loss never overlaps? Now both of them are randomly selected. # floatX = 'float32' import sys, os, cPickle, copy, subprocess, time, inspect, re #, h5py from utils.misc import get_logger log = get_logger() from tensorflow_code import pool_fkns import numpy as np import utils.mbatch_generation from utils.load_data import * from utils.model_utils import load_davids_kaldi_model import tensorflow_code.optim import tensorflow_code.models import tensorflow as tf from utils.train import train_nn, get_train_batch_fkn from tensorflow_code.dplda import mBW_2_PQck from tensorflow_code.dplda import p_eff, llrThreshold, labMat2weight, lab2matrix from tensorflow_code.load_save import save_tf_model from utils.evaluation import get_eval_info
from utils.misc import get_logger from config import main_conf as mc from config import ranking_conf as rc import pandas as pd import logging rank_log = get_logger(__name__) rank_log.setLevel(logging.INFO) def rank_exe(df, pred_column): lsp = 0 i = 0 df_daily_profit = pd.DataFrame(columns=[rc["rank_df"]]) dfs_long = [] dfs_short = [] df_rank = df.sort_values([mc["date_clm"], pred_column], ascending=[True, False]) date_list = list(df_rank.Date.unique()) date_list.sort() for date in date_list[:-1]: i += 1 data_date = df_rank[df_rank.Date == date] data_date = data_date.drop_duplicates(subset=[mc["ticker"] ]) # is that useful? long_part = data_date.iloc[:rc["stocks_number"]] short_part = data_date.iloc[-rc["stocks_number"]:] # print("long: ", pred_column, "\n", long_part[["Date", "ticker", pred_column, mc["label"]]].head(5)) # print("short: ", pred_column, "\n", short_part[["Date", "ticker", pred_column, mc["label"]]].head(5)) dp, lsp = long_short_profit(long_part, short_part, lsp) df_daily_profit.loc[i] = [date, lsp, dp]
main_path = args.exp_dir folders = ["event", "model", "log", "param"] if args.setup in ['single']: folders.append('decoding') for name in folders: folder = "{}/{}/".format(name, args.experiment) if hasattr(args, "experiment") else name + '/' args.__dict__["{}_path".format(name)] = os.path.join(args.exp_dir, folder) Path(args.__dict__["{}_path".format(name)]).mkdir(parents=True, exist_ok=True) if not hasattr(args, 'hp_str'): args.hp_str = get_hp_str(args) args.prefix = strftime("%m.%d_%H.%M.", localtime()) args.id_str = args.prefix + "_" + args.hp_str logger = get_logger(args) set_seed(args) # Save config args.save((str(args.param_path + args.id_str))) # Data train_it, dev_it = get_data(args) args.__dict__.update({'logger': logger}) args.logger.info(args) args.logger.info('Starting with HPARAMS: {}'.format(args.hp_str)) # Model model = get_model(args) extra_input = {}
import matplotlib.pyplot as plt import pandas as pd from utils.misc import get_logger import logging import os plotter_log = get_logger(__name__) plotter_log.setLevel(logging.INFO) class plotter(object): def __init__(self, dataframe, train, test, out_path): self.dataframe = dataframe self.train = train self.test = test self.out_path = out_path def plot_clop(self): """ Plot "Close - Open" variable """ y = self.dataframe.Clop y = pd.Series(y) fig = plt.figure() y.plot(figsize=(22, 8), color='teal') plt.title('Clop variable') figname = os.path.join(self.out_path, "clop" + ".png") plt.savefig(figname, dpi=200) plt.close() def plot_actual_day_rt(self):
from config import main_conf as mc import logging import numpy as np from sklearn.preprocessing import MinMaxScaler from ta.momentum import ROCIndicator, WilliamsRIndicator, RSIIndicator, stochrsi_k from ta.trend import EMAIndicator, MACD from ta.volume import AccDistIndexIndicator from utils.misc import get_logger seldf_log = get_logger(__name__) seldf_log.setLevel(logging.INFO) def preprocess_df(df, csv_string): """ It creates an unique list of dfs by reading csv files. It select list of features by looking at config file. :param df: pandas df :param csv_string: stock name string """ df = df[mc["columns"]] df = create_ticker(df, csv_string) # lagged features df = lagged_daily_price_return(df) df = create_next_day_return(df) df = lagged_daily_indicators(df) # Indicators df = create_tech_indicators(df) scaled_df = scale_df(df) return df, scaled_df
import logging from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor from sklearn.linear_model import LinearRegression, Lasso, ElasticNet, LogisticRegression # RidgeCV from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import VotingRegressor from sklearn.svm import LinearSVR, SVR import numpy as np from sklearn.neighbors import KNeighborsRegressor from utils.misc import get_logger from config import regress_models_conf as rmc from sklearn.model_selection import TimeSeriesSplit, cross_val_predict, GridSearchCV from mlxtend.regressor import StackingRegressor as StackingRegresorMLX from itertools import combinations model_log = get_logger(__name__) model_log.setLevel(logging.INFO) class RegressorModels(object): def __init__(self, x_train, y_train, x_test): self.x_train = x_train self.y_train = y_train self.x_test = x_test def random_forest_regr(self): """ Random Forest fit :return: prediction """ model = RandomForestRegressor(random_state=123) model.fit(self.x_train, self.y_train)
import matplotlib.pyplot as plt from utils.misc import get_logger import os import logging plot_log = get_logger(__name__) plot_log.setLevel(logging.INFO) def linear_regression_plot(y_test, train_pred, out_path): plot_log.info("in linear regression plot") plt.plot(y_test.values, color='r', linewidth=1, label='test') plt.plot(train_pred, '-.', color="b", linewidth=1, label='pred') plt.ylabel('\u0394 Rel') plt.legend(loc='upper center', frameon=False) plt.title('LinearRegression') figname = os.path.join(out_path, "linearRegr_plot"+".png") plt.savefig(figname, dpi=200) plt.close() def linear_regression_scatter(y_test, train_pred, out_path): plot_log.info("in linear regression scatter") plt.scatter(y_test.values, train_pred) plt.xlabel('\u0394 Rel') plt.ylabel('\u0394 Rel predicted') plt.title('\u0394 Rel comparison') plt.xlim(-0.2, 0.2) plt.ylim(-0.2, 0.2) figname = os.path.join(out_path, "linearRegr_scatter"+".png") plt.savefig(figname, dpi=200) plt.close()