def create(self, request, operation): """ Create a component for a particular experiment --- request_serializer: ComponentSerializer """ data = json.loads(JSONRenderer().render(request.DATA)) op = None # TODO: [refactor] This value is probably not needed exp_id = int(data["experiment"]) # TODO: [required] this statement should be surrounded by try-catch exp = Experiment.objects.get(pk=exp_id) print "Experiment ", exp_id, " Operation ", operation op = self.set_operation(operation, data) component = Component(experiment=exp, created_time=datetime.now(), modified_time=datetime.now(), operation_type=op) component.save() serializer = ComponentSerializer(component) return send_response("GET", serializer)
def update_results_function(id, model_name, city, feature_type, model_parameters, model_results, mse): """ Update results df / pickle with info from experiment """ results_file = Path(f'{PICKLE_PATH}/results.pkl') # Make run id RUN_ID = 'RUN-' + str(get_length_results() + 1) # Create / Read results df from pickle if not (results_file.exists()): columns = [ 'RUN_ID', "DATETIME", "MODEL_NAME", "CITY", "FEATURE_TYPE", "HOST_MACHINE", "MODEL_PARAMETERS", "MODEL_RESULTS", "MEAN_SQUARED_ERROR" ] results = pd.DataFrame(columns=columns) results.set_index('RUN_ID') else: results = pd.read_pickle(results_file) # If run exists, update if (results['RUN_ID'] == id).any(): row = results[results['RUN_ID'] == id] row.DATETIME = datetime.now() row.MODEL_NAME = model_name row.CITY = city row.FEATURE_TYPE = feature_type row.HOST_MACHINE = socket.gethostname() row.MODEL_PARAMETERS = model_parameters row.MODEL_RESULTS = model_results row.MEAN_SQUARED_ERROR = mse else: results = results.append( { "RUN_ID": id, "DATETIME": datetime.now(), "MODEL_NAME": model_name, "CITY": city, "FEATURE_TYPE": feature_type, "HOST_MACHINE": socket.gethostname(), "MODEL_PARAMETERS": model_parameters, "MODEL_RESULTS": model_results, "MEAN_SQUARED_ERROR": mse }, ignore_index=True) # Save pickle results.to_pickle(results_file) return results
def get_historical(quote): end = datetime.now() start = datetime(end.year-2,end.month,end.day) data = yf.download(quote, start=start, end=end) df = pd.DataFrame(data=data) df.to_csv(''+quote+'.csv') if(df.empty): from alpha_vantage.timeseries import TimeSeries ts = TimeSeries(key='N6A6QT6IBFJOPJ70',output_format='pandas') data, meta_data = ts.get_daily_adjusted(symbol='NSE:'+quote, outputsize='full') #Format df #Last 2 yrs rows => 502, in ascending order => ::-1 data=data.head(503).iloc[::-1] data=data.reset_index() #Keep Required cols only df=pd.DataFrame() df['Date']=data['date'] df['Open']=data['1. open'] df['High']=data['2. high'] df['Low']=data['3. low'] df['Close']=data['4. close'] df['Adj Close']=data['5. adjusted close'] df['Volume']=data['6. volume'] df.to_csv(''+quote+'.csv',index=False) return
def remove_expired_session_results(self): """ We store result sets in the session variable for access from the stay detail view. They are valid for 30 minutes, after which they are purged by a scheduled Heroku call to this command. """ maximum_result_age_in_seconds = settings.MAXIMUM_RESULT_AGE_IN_SECONDS session_keys = Session.objects.all().values_list('session_key', flat=True) for session_key in session_keys: session = SessionStore(session_key=session_key) # Each user session may have multiple associated search keys for key in list(session.keys()): item = session[key] # Identify keys associated with search results if type(item) != dict: continue if sorted(list(item.keys())) == ['stays', 'timestamp']: timestamp = datetime.strptime(item['timestamp'], '%Y-%m-%dT%H:%M:%S') timedelta = datetime.now() - timestamp age_in_seconds = timedelta.total_seconds() if age_in_seconds > maximum_result_age_in_seconds: del session[key] session.save()
def get_stock_data(stock_symbol): end = datetime.now() start = datetime(end.year-2,end.month,end.day) data = yf.download(stock_symbol, start=start, end=end) df = pd.DataFrame(data=data) df.to_csv(''+stock_symbol+'.csv') return
def pdf_OCR(pdf_file_path): pages = convert_from_path(pdf_file_path, 500) # To store images of each page of PDF to image image_counter = 1 now = datetime.now() for page in pages: filename = "/home/amit_bahir/Desktop/deidentification/reports/page_" + now.strftime( "%d-%m-%Y_%H:%M:%S:%f_") + str(image_counter) + ".jpg" page.save(filename, 'JPEG') image_counter = image_counter + 1 filelimit = image_counter - 1 txt_file_path = "static/" + "to_be_deidentified" + now.strftime( "%d-%m-%Y_%H:%M:%S:%f") + ".txt" with open(txt_file_path, "a") as f: for i in range(1, filelimit + 1): filename = "/home/amit_bahir/Desktop/deidentification/reports/page_" + now.strftime( "%d-%m-%Y_%H:%M:%S:%f_") + str(i) + ".jpg" text = str(((pytesseract.image_to_string(Image.open(filename))))) text = text.replace('-\n', '') f.write(text) return txt_file_path
def deidentification(): #return EHR_data_extractor(str(data["diagnosis"])) #return data["diagnosis"] if request.method == 'POST': request_data = request.get_json() #return (request_data["diagnosis"]) string = request_data['description'] + ".\n" + request_data[ 'medicine'] + ".\n" + request_data[ 'injection'] + ".\n" + request_data['labReport'] + ".\n" deidentified_string = EHR_data_extractor(string) #print(deidentified_string) deidentified_list = deidentified_string.splitlines() #print(deidentified_list) # Deidentifying files deidentified_files = [] for file in request_data['files']: identifier = file[-3:] if identifier == "png": deid_img_path = img_deidentifier(file) deid_img_path = "http://192.168.43.49:5000/" + deid_img_path deidentified_files.append(deid_img_path) elif identifier == "jpg": deid_img_path = img_deidentifier(file) deid_img_path = "http://192.168.43.49:5000/" + deid_img_path deidentified_files.append(deid_img_path) elif identifier == "pdf": deid_pdf_path = pdf_deidentifier(file) deid_pdf_path = "http://192.168.43.49:5000/" + deid_pdf_path deidentified_files.append(deid_pdf_path) elif identifier == "txt": with open(file, "r") as f: input_string = f.read() processed_string = EHR_data_extractor(input_string) now = datetime.now() deid_text_path = "static/" + "deidentified_" + now.strftime( "%d-%m-%Y %H:%M:%S:%f") + ".txt" with open(deid_text_path, "a") as f: f.write(processed_string) deidentified_files.append(deid_text_path) # Now deidentifying fields deidentified_data = { 'description': deidentified_list[ 0], #EHR_data_extractor(request_data['description']), 'medicine': deidentified_list[ 1], #EHR_data_extractor(request_data['medicine']), 'injection': deidentified_list[ 2], #EHR_data_extractor(request_data['injection']), 'labReport': deidentified_list[ 3], #EHR_data_extractor(request_data['labReport']), 'files': deidentified_files #request_data['files'] } return jsonify(deidentified_data) return jsonify({"message": "Didn't perform de_identification"})
def getReportDateList(): year_now = datetime.now().year.numerator year_start = int(start_date[0:4]) date_list = [] while year_start < year_now: date = str(year_start) + '-12-31' year_start += 1 date_list.append(date) return date_list
def get_subject_line(): date = datetime.now() formatted_date = str(date.month) + '/' + str(date.day) + '/' + str( date.year) formatted_time = str(date.hour) + ':' + str(date.minute) + ':' + str( date.second) formatted_datetime = ' - '.join([formatted_date, formatted_time]) return ''.join([SUBJECT_PREFIX, formatted_datetime])
def get_contract(cls, exchange_, contract_, date_=None): if date_ is None: date_ = datetime.now().date() contract_list = select([contract_info.c.contract_symbol]).where( and_(contract_info.c.exchange_symbol == exchange_, contract_info.c.underlying_symbol == contract_, contract_info.c.expiration > date_)).execute().fetchall() return [s[0] for s in contract_list]
def getReportQuartList(): year_now = datetime.now().year.numerator month_now = datetime.now().month.numerator year_start = int(start_date[0:4]) date_list = [] while year_start < year_now: date_4 = str(year_start) + '-12-31' date_3 = str(year_start) + '-09-30' date_2 = str(year_start) + '-06-30' date_1 = str(year_start) + '-03-31' year_start += 1 date_list.append(date_1) date_list.append(date_2) date_list.append(date_3) date_list.append(date_4) if (month_now > 4): date_list.append(str(year_now) + '-03-31') if (month_now > 7): date_list.append(str(year_now) + '-06-30') if (month_now > 10): date_list.append(str(year_now) + '-09-30') return date_list
def evaluate_models(dataset, p_values, d_values, q_values): dataset = dataset.astype('float32') best_score, best_cfg = float("inf"), None start_time = datetime.now() for p in p_values: for d in d_values: for q in q_values: order = (p, d, q) try: mse = evaluate_arima_model(dataset, order) if mse < best_score: best_score, best_cfg = mse, order str_elapsed = "elapsed time: {0}".format(datetime.now() - start_time) print("ARIMA{0} MSE={1:.3f} {2}".format( order, mse, str_elapsed)) except: continue print("Best ARIMA%s MSE=%.3f\n" % (best_cfg, best_score)) return
def getReportQuartList(): year_now = datetime.now().year.numerator month_now = datetime.now().month.numerator year_start = int(start_date[0:4]) date_list = [] while year_start < year_now: date_4 = str(year_start) + '-12-31' date_3 = str(year_start) + '-09-30' date_2 = str(year_start) + '-06-30' date_1 = str(year_start) + '-03-31' year_start += 1 date_list.append(date_1) date_list.append(date_2) date_list.append(date_3) date_list.append(date_4) if(month_now > 4): date_list.append(str(year_now) + '-03-31') if(month_now > 7): date_list.append(str(year_now) + '-06-30') if(month_now > 10): date_list.append(str(year_now) + '-09-30') return date_list
def url_to_pdf(url): response = requests.get(url, stream=True) #print(response) #print(type(response)) #print(response.content) now = datetime.now() pdf_file_path = "static/" + now.strftime("%d-%m-%Y %H:%M:%S:%f") + ".pdf" with open(pdf_file_path, 'wb') as f: f.write(response.content) return pdf_file_path
def form_results(): """ Forms folders for each run to store the tensorboard files and saved models. """ folder_name = "/{0}_{1}".format(datetime.now().strftime("%Y%m%d%H%M%S"), exptitle) tensorboard_path = results_path + folder_name + '/Tensorboard' saved_model_path = results_path + folder_name + '/Saved_models/' print(results_path + folder_name) if not os.path.exists(results_path + folder_name): os.makedirs(results_path + folder_name) os.makedirs(tensorboard_path) os.makedirs(saved_model_path) return tensorboard_path, saved_model_path
def forecast(id, cc, cfd_opt): cfds_raw = query('queries/cfds.sql', cfd_opt) indexes_raw = query('queries/indexes.sql') cfds = load_indicators(cfds_raw) indexes = load_indicators(indexes_raw) df = concat([cfds, indexes], axis=1, join='outer') # remove dates when STO is closed # up until yesterday since that is the last day from which # we have all data # get 5 periods so we have something to interpolate between in case # of missing values due to e.g US market close index_range = bdate_range( end=datetime.now().date() - timedelta(1), periods=5, freq='C', holidays=get_trading_close_holidays(cc) ) print(index_range) df = df.reindex(index_range) # after interpolation we only need one lag df = df.interpolate(limit_direction='both') df = df.tail(1) print(df) # normalize values scaler = joblib.load('outputs/%s-scaler.save' % id) df = scaler.transform(df) # extract wanted features features_df = read_csv('outputs/%s-features.csv' % id, header=None) features_idx = features_df[1].values.flatten() sample = df[:,features_idx] print(features_df[0].values) print(sample) # forecast model = load_model('outputs/%s-model.h5' % id) pred_probs = model.predict(sample)[0] pred_prob = np.amax(pred_probs) pred_idx = np.argmax(pred_probs, axis=-1) directions = [ 'DOWN', 'UP' ] pred_direction = directions[pred_idx] return (pred_direction, pred_prob)
def img_deidentifier(url): img = url_to_img(url) input_string = img_OCR(img) processed_string = EHR_data_extractor(input_string) now = datetime.now() deid_img_path = "static/" + "deidentified_" + now.strftime( "%d-%m-%Y_%H:%M:%S:%f") + ".txt" with open(deid_img_path, "a") as f: f.write(processed_string) return deid_img_path
def create(self, request, operation): """ Create a component for a particular experiment --- request_serializer: ComponentSerializer """ data = json.loads(JSONRenderer().render(request.DATA)) op = None # TODO: [refactor] This value is probably not needed exp_id = int(data["experiment"]) # TODO: [required] this statement should be surrounded by try-catch exp = Experiment.objects.get(pk=exp_id) print "Experiment ", exp_id, " Operation ", operation op = self.set_operation(operation, data) component = Component( experiment=exp, created_time=datetime.now(), modified_time=datetime.now(), operation_type=op) component.save() serializer = ComponentSerializer(component) return send_response("GET", serializer)
def pdf_deidentifier(url): pdf_file_path = url_to_pdf(url) txt_file_path = pdf_OCR(pdf_file_path) with open(txt_file_path, "r") as f: input_string = f.read() processed_string = EHR_data_extractor(input_string) now = datetime.now() deid_pdf_path = "static/" + "deidentified_" + now.strftime( "%d-%m-%Y_%H:%M:%S:%f") + ".txt" with open(deid_pdf_path, "a") as f: f.write(processed_string) return deid_pdf_path
def save_clean_csv_dataset(raw_csv_dataset_path: str = None): dataset = read_csv(raw_csv_dataset_path, parse_dates=[['year', 'month', 'day', 'hour']], index_col=0, date_parser=ModelUtils.parse_date) dataset.drop('No', axis=1, inplace=True) # manually specify column names dataset.columns = ['pollution', 'dew', 'temp', 'press', 'wnd_dir', 'wnd_spd', 'snow', 'rain'] dataset.index.name = 'date' # mark all NA values with 0 dataset['pollution'].fillna(0, inplace=True) now = datetime.now() # current date and time timestamp_str = now.strftime("%m-%d-%Y-%H-%M-%S-%f") dataset_name = f"dataset_{timestamp_str}.csv" dataset_path = os.path.join('assets', 'datasets', dataset_name) # save to file dataset.to_csv(dataset_path) return dataset_path
def remaining(): unitBan = {} for filename in os.listdir('commission-residual'): data1 = read_csv('commission-residual/' + filename, encoding='utf-8', low_memory=False, keep_default_na=False, na_values=['']) for unit in data1['unit_number']: unitBan[unit] = data1.ix[data1['unit_number']==unit, 'ban_no'].values[0] print('done reading units to ban') data = read_csv('crmclientrecords.csv', encoding='utf-8') for unit in list(data['unit_number'].values): data.ix[data['unit_number']==unit, 'ban_id']=unitBan[unit] data['contract_term'].fillna(0, inplace=True) data['contract_expiration'] = to_datetime(data['contract_expiration'].astype(str), errors='coerce') data['contract_start_date'] = to_datetime(data['contract_start_date'].astype(str), errors='coerce') data['contract_expiration'] = data['contract_start_date'] + data['contract_term'].values.astype('timedelta64[M]') for column in ['contract_expiration', 'contract_start_date']: data[column] = data[column].apply(lambda val: to_datetime(val) + DateOffset(months=13)) data['contract_months_left'] = (datetime.now().date() - data['contract_start_date']).apply(lambda x: x.astype('timedelta64[M]') / np.timedelta64(1,'M')) data.to_csv('crmclientrecords1.csv', encoding='utf-8', index=False)
def __init__(self, date_now=None, period=20, today=False, batch_size=30, use_tqdm=True): self.start_date = '20100104' if date_now: self.date_now = date_now self.today = False else: self.date_now = dt.now().strftime('%Y%m%d') self.today = True if today: self.today = True self.period = period self.batch_size = batch_size self.trade_date: np.ndarray = self._get_trade_date() self.trade_date_back: np.ndarray = self.trade_date[::-1] self.pre_date = self.trade_date[-2] self.ts_code_now = pro.daily( trade_date=self.date_now)['ts_code'].values self.stock_name_dict: dict = self._get_name_dict() self.stock2bond_dict, self.bond2stock_dict, \ self.cb_stock_list, self.cb_bond_list, \ self.cb_bond_name_dict = self._load_cvt_bond_tushare() self.use_tqdm = use_tqdm
def alvin_arima(df0, lookback=40, avg_close_days=1, correlation_sig=0.33): df = df0.copy() # This could be set to average of last 3 close values to smooth (avg_close_days=3 -> avgclose=(close+close[1]+close[2])/3) if avg_close_days == 1: df['avg_close'] = df['Close'] else: df['avg_close'] = (df['Close'] + sum( [df['Close'].shift(x) for x in range(1, avg_close_days)])) / avg_close_days df['diff'] = df['avg_close'] - df['avg_close'].shift(1) df['diff1'] = df['diff'] - df['diff'].shift(1) df['diff2'] = df['diff'] - df['diff'].shift(2) df['diff3'] = df['diff'] - df['diff'].shift(3) df['diff4'] = df['diff'] - df['diff'].shift(4) df['diff5'] = df['diff'] - df['diff'].shift(5) df['diff6'] = df['diff'] - df['diff'].shift(6) df.dropna(inplace=True) df['ARIMA'] = np.nan df.reset_index(drop=True, inplace=True) ix1 = 0 ix2 = df.shape[0] - lookback + 1 print("Running regression to index {0}...".format(ix2)) start_time = datetime.now() for ix in range(ix1, ix2): a1 = np.array(df.loc[ix:ix + lookback - 1, 'diff1']) a2 = np.array(df.loc[ix:ix + lookback - 1, 'diff2']) a3 = np.array(df.loc[ix:ix + lookback - 1, 'diff3']) a4 = np.array(df.loc[ix:ix + lookback - 1, 'diff4']) a5 = np.array(df.loc[ix:ix + lookback - 1, 'diff5']) a6 = np.array(df.loc[ix:ix + lookback - 1, 'diff6']) ac = [] ac.append(alvin_coefficient(a1, a2)) ac.append(alvin_coefficient(a1, a3)) ac.append(alvin_coefficient(a1, a4)) ac.append(alvin_coefficient(a1, a5)) ac.append(alvin_coefficient(a1, a6)) opti = optimal_coefficient(ac, correlation_sig) if opti == 0: lra = linregarray(a1) elif opti == 1: lra = linregarray(a2) elif opti == 2: lra = linregarray(a3) elif opti == 3: lra = linregarray(a4) elif opti == 4: lra = linregarray(a5) elif opti == 5: lra = linregarray(a6) else: print("ERROR: No optimal lag found for ix={0}".format(ix)) lra = linregarray(a1) if ix % 100 == 0: print( "{0:2d}: {1} {2:.2f} {3:.2f} {4:.2f} {5:.2f} {6:.2f} {7:.6f}" .format(ix, opti, ac[0], ac[1], ac[2], ac[3], ac[4], lra)) df.loc[ix + lookback, 'ARIMA'] = lra elapsed = datetime.now() - start_time print("elapsed time: {0}".format(elapsed)) # I *think* this is how to calculate the Exponential Moving Average df['EMA'] = df['diff'].rolling(window=lookback, win_type='bartlett').mean() df.drop(['diff', 'diff1', 'diff2', 'diff3', 'diff4', 'diff5', 'diff6'], axis=1, inplace=True) forecast = df.iloc[-1]['ARIMA'] df.dropna(inplace=True) return df, forecast
import pvlib from pandas import datetime print( pvlib.solarposition.get_solarposition(datetime.now(), 55, -2, altitude=200))
def __init__(self,master): self.master=master self.crudeval= StringVar() self.crudeval1=StringVar() self.username = StringVar() self.password = StringVar() self.first=None self.second=None self.third=None self.fourth=None self.StartUpDateTime = datetime.now() self.DefaultYear = self.StartUpDateTime.year self.DefaultMonthNumber = self.StartUpDateTime.month self.DefaultDayOfMonth = self.StartUpDateTime.day self.YearAndMonthLengths = [ 365,31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ] self.EnglishMonthNames = ( 'Entire Year','January','February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ) self.Month_Names = self.EnglishMonthNames self.DaysInMonth = OrderedDict() for i in range( 0, len( self.Month_Names ) ): self.DaysInMonth[ self.Month_Names[ i ] ] = self.YearAndMonthLengths[ i ] self.DefaultMonthName = self.Month_Names[ self.DefaultMonthNumber ] self.DefaultMonthLength = self.DaysInMonth[ self.DefaultMonthName ] # Initialize the Spinbox interface variables to todays date self.SelectedYear = IntVar( value = self.DefaultYear ) self.SelectedMonthName = StringVar( value = self.DefaultMonthName ) self.SelectedMonthLength = IntVar( value = self.DefaultMonthLength ) self.SelectedDay = IntVar( value = self.DefaultDayOfMonth ) self.SelectedYear1 = IntVar( value = self.DefaultYear ) self.SelectedMonthName1 = StringVar( value = self.DefaultMonthName ) self.SelectedMonthLength1 = IntVar( value = self.DefaultMonthLength ) self.SelectedDay1 = IntVar( value = self.DefaultDayOfMonth ) self.SelectedYear2 = IntVar( ) self.SelectedMonthName2 = StringVar( value = self.DefaultMonthName ) self.SelectedMonthLength2 = IntVar( value = self.DefaultMonthLength ) self.SelectedDay2 = IntVar( value = self.DefaultDayOfMonth ) self.SelectedYear3 = IntVar( ) self.SelectedMonthName3 = StringVar( value = self.DefaultMonthName ) self.SelectedMonthLength3 = IntVar( value = self.DefaultMonthLength ) self.SelectedDay3 = IntVar( value = self.DefaultDayOfMonth ) self.SelectedYear11 = IntVar( value = self.DefaultYear ) self.SelectedMonthName11 = StringVar( value = self.DefaultMonthName ) self.SelectedYear12 = IntVar( value = self.DefaultYear ) self.SelectedMonthName12 = StringVar( value = self.DefaultMonthName ) self.code=0 self.code1=0 self.MonthSpinBox=None self.YearSpinBox=None self.predictbtn=None self.DaySpinBox=None self.bgtabs='#3B3B98' self.bgselected='#1B9CFC' self.countries=["France","Germany","Italy","Spain","United Kingdom","Japan","Canada","U.S.A","India"]
#series = read_csv('sales-of-shampoo-over-a-three-ye.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser) series = read_csv('2018_04_26_Zaqatala_Shuvalan.csv', header=0, index_col=0) series = series[['odd_1']] # configure n_lag = 1 n_seq = 3 n_test = 10 n_epochs = 20 n_batch = 1 n_neurons = 1 # prepare data scaler, train, test = prepare_data(series, n_test, n_lag, n_seq) # fit model print('training model...') model = fit_lstm(train, n_lag, n_seq, n_batch, n_epochs, n_neurons) # make forecasts print('making forecasts...') forecasts = make_forecasts(model, n_batch, train, test, n_lag, n_seq) # inverse transform forecasts and test forecasts = inverse_transform(series, forecasts, scaler, n_test + 2) actual = [row[n_lag:] for row in test] actual = inverse_transform(series, actual, scaler, n_test + 2) # evaluate forecasts evaluate_forecasts(actual, forecasts, n_lag, n_seq) # plot forecasts plot_forecasts(series, forecasts, n_test + 2) if input('save model? (y/n)') == 'y': save_model_to_file(model, 'model' + datetime.now().strftime('%Y-%m-%d-%H_%M_%S'))
from __future__ import division import pandas as pd from pandas import Series, DataFrame, datetime import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set_style('whitegrid') from pandas_datareader import DataReader from datetime import datetime tech_list = ['AAPL'] #,'GOOG','MSFT','AMZN','NVDA','FB'] end = datetime.now() start = datetime(end.year - 5, end.month, end.day) for stock in tech_list: globals()[stock] = DataReader(stock, 'yahoo', start, end) closing_df = DataReader(tech_list, 'yahoo', start, end)['Adj Close'] last_price = closing_df.iloc[-1].iloc[-1] ##print(closing_df) tech_rets = closing_df.pct_change() rets = tech_rets.dropna() AAPL['Daily Return'] = AAPL['Adj Close'].pct_change() ##NVDA['Daily Return'] = NVDA['Adj Close'].pct_change() days = 365 dt = 1 / days mu = rets.mean() sigma = rets.std()
def search(criteria, supplier=None, display_all_columns=False): night_count = len(criteria['check_in_range']) if supplier is None: # pragma: no cover supplier = settings.DEFAULT_SUPPLIER get_rates = getattr(datafeeds, 'get_' + supplier + '_rates') rates, entire_stay_costs = get_rates(criteria) log_size(rates, 'rates') log_size(entire_stay_costs, 'entire_stay_costs') rates = datafeeds.filter_out_unmapped_hotels(rates) max_switch_distance_in_km = settings.MAX_SWITCH_DISTANCE_IN_KM max_review_tier_decrease = settings.MAX_REVIEW_TIER_DECREASE # Paris is the largest city by number of hotels. While the default settings # work well for other cities, Paris requires some overrides if 2 < float(criteria['longitude']) < 2.6 \ and 48.8 < float(criteria['latitude']) < 50: # pragma: no cover logger.info('PARIS EXCEPTION APPLIED') max_switch_distance_in_km = int( os.getenv('PARIS_MAX_SWITCH_DISTANCE_IN_KM', 2)) max_review_tier_decrease = int( os.getenv('PARIS_MAX_REVIEW_TIER_DECREASE', 0)) start_time = datetime.now() switches = switch.construct_switches(criteria, entire_stay_costs, max_switch_distance_in_km, max_review_tier_decrease) end_time = datetime.now() elapsed_time = end_time - start_time logger.info('Switch construction took {}s'.format(elapsed_time)) log_size(switches, 'switches') stays = algorithm.construct_stays(rates, criteria['check_in_range'], switches) log_size(stays, 'initial stays') stays = outputs.add_metadata_to_stays(stays) stays = outputs.add_benchmark_to_stays(stays) stays = filter_and_sort.filter_stays(stays, sample_rate=2, min_saving=-25, min_saving_percentage=-0.05, max_upgrade_cost=50, max_upgrade_cost_percentage=0.5) log_size(stays, 'filtered stays') stays = outputs.add_rate_information_to_stays(stays, rates) log_size(stays, 'filtered stays + rate info') stays = filter_and_sort.sort_stays(stays, night_count) stays = outputs.make_hotel_ids_int(stays) if stays['switch_count'].max() > 0: stays = outputs.add_switching_benefit(stays, criteria['currency']) if not display_all_columns: # pragma: no cover stays = outputs.remove_no_longer_required_columns(stays) stays = outputs.round_data(stays) return rates, stays
import os import datetime as dt from datetime import timedelta from airflow import DAG from airflow.operators.bash import BashOperator from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago import psycopg2 from psycopg2 import OperationalError _locale._getdefaultlocale = (lambda *args: ['en_US', 'UTF-8']) client = ad_manager.AdManagerClient.LoadFromStorage( r'/Users/christian/Documents/Work/gam_api/ad_manager_keyfile.yml') report_downloader = client.GetDataDownloader(version='v202005') end_date = datetime.now().date() start_date = end_date - Timedelta(days=1) network_service = client.GetService('NetworkService', version='v202011') current_network = network_service.getCurrentNetwork() print('Found network %s (%s)!' % (current_network['displayName'], current_network['networkCode'])) report_job = { 'reportQuery': { 'dimensions': ['AD_UNIT_ID', 'GPR_DEMOGRAPHICS'], 'columns': [ 'TOTAL_CODE_SERVED_COUNT', 'TOTAL_LINE_ITEM_LEVEL_IMPRESSIONS', 'TOTAL_LINE_ITEM_LEVEL_ALL_REVENUE', 'TOTAL_LINE_ITEM_LEVEL_CTR'
def __init__(self): self.net = {} self.load() self.data_path = str(Path(__file__).parent.absolute().parent.absolute().parent.absolute()) + '/models/data.csv' self.log_dir = str( Path(__file__).parent.absolute().parent.absolute().parent.absolute()) + '/models/' + "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S") self.tensorboard_callback = tensorflow.keras.callbacks.TensorBoard(log_dir=self.log_dir, histogram_freq=1)
def execute_search(criteria, session_key, reply_channel): run_from_management_command = criteria.get('data_mining') # Check-in range pre-calculated when running analytics if not run_from_management_command: # pragma: no cover check_in = datetime.strptime(criteria['checkIn'], '%Y-%m-%d') check_out = datetime.strptime(criteria['checkOut'], '%Y-%m-%d') criteria['check_in_range'] = date_range(check_in, check_out - DateOffset(days=1)) night_count = len(criteria['check_in_range']) outbound_message = { 'status': '200', 'currency': criteria['currency'], 'currency_symbol': settings.CURRENCY_SYMBOLS[criteria['currency']], 'country': criteria['country'], # Blank if not country search 'night_count': night_count, } try: criteria['city'] = unquote(criteria['city']) criteria['county'] = unquote(criteria['county']) criteria['state'] = unquote(criteria['state']) criteria['country'] = unquote(criteria['country']) if criteria['country'] in settings.BLOCKED_COUNTRIES: # We no longer permit searches for certain high-risk countries due # to high levels of attempted fraud. We block them in the front-end # but have this additional safeguard in case they are smart enough # to edit the URL directly (and another later in case they figure # out to submit a search without country parameter, but better to # catch them as early as possible) logger.error( 'Someone tried searching for a blocked country {} via results URL' .format(criteria['country'])) raise Exception _, stays = execute.search(criteria) if not run_from_management_command: # pragma: no cover search_key = utils.create_session_key( unquote(criteria['place_name']), criteria['checkIn'], criteria['checkOut'], criteria['occupants'], criteria['latitude'], criteria['longitude'], criteria['currency'], ) # Store complete record (including lengthy rateKey information) for # later use in stay detail view http_session = SessionStore(session_key=session_key) http_session[search_key] = { 'stays': stays.to_json(), 'timestamp': datetime.now().strftime('%Y-%m-%dT%H:%M:%S') } http_session.save() fields_required_on_results_page = [ 'default_sort', 'hotel_1_id', 'check_in_1', 'night_count_1', 'entire_stay_cost_1', 'hotel_2_id', 'night_count_2', 'entire_stay_cost_2', 'switch_count', 'distance_in_km', 'rounded_stay_cost', 'rounded_nightly_cost', 'benchmark_stay_cost', 'primary_star_rating', 'review_score', 'min_review_tier', 'primary_review_tier', 'refundable', ] required_fields_only_present_in_multi_night_search = [ 'check_in_2', 'cost_delta_vs_stay_benchmark', 'percentage_cost_delta_vs_stay_benchmark', 'switch_benefit', ] if stays['switch_count'].max() > 0: # pragma: no cover fields_required_on_results_page = \ fields_required_on_results_page + required_fields_only_present_in_multi_night_search max_saving = abs(stays['percentage_cost_delta_vs_stay_benchmark'].min()) if max_saving >= 0.3: log_max_saving(criteria, max_saving) if run_from_management_command: # pragma: no cover # Hotel info not required; pass back to calling command fields_required_for_data_mining = ['stay_cost', 'cost_per_quality_unit'] fields_required_on_results_page = \ fields_required_on_results_page + fields_required_for_data_mining return stays[fields_required_on_results_page] outbound_message['stays'] = \ stays[fields_required_on_results_page].to_json(orient='records') hotel_id_columns = stays.columns.str.contains('hotel_[\d]_id') hotel_ids = melt(stays.loc[:, hotel_id_columns]).dropna()['value'].unique() hotels = Hotel.objects.filter(hotel_id__in=hotel_ids).select_related().iterator() hotels = [{ 'hotel_id': str(hotel.hotel_id), # String required for use as key 'name': hotel.name, 'star_rating': hotel.star_rating, 'main_image_url': hotel.main_image_url, 'recommendations': hotel.trustyou.recommendations, 'summary': hotel.trustyou.summary, 'trust_score': hotel.trustyou.trust_score, 'trust_score_description': hotel.trustyou.trust_score_description, 'review_count': hotel.trustyou.review_count, 'category_badge': hotel.trustyou.category_badge, 'latitude': hotel.latitude, 'longitude': hotel.longitude, } for hotel in hotels] hotels = DataFrame(hotels) hotels.set_index('hotel_id', inplace=True) outbound_message['hotels'] = hotels.to_dict('index') min_stay_cost = stays['stay_cost'].min() max_stay_cost = stays['stay_cost'].max() try: # pragma: no cover min_switch_distance = int(stays['distance_in_km'].min()) max_switch_distance = int(stays['distance_in_km'].max()) except ValueError: min_switch_distance = 0 max_switch_distance = 0 min_nightly_cost = min_stay_cost / night_count max_nightly_cost = max_stay_cost / night_count outbound_message['cost_ranges'] = { 'minStayCost': floor(min_stay_cost), 'maxStayCost': ceil(max_stay_cost), 'minNightlyCost': floor(min_nightly_cost), 'maxNightlyCost': ceil(max_nightly_cost), } outbound_message['distance_ranges'] = { 'minDistanceSwitch': min_switch_distance, 'maxDistanceSwitch': max_switch_distance, } except (RequestError, NoResultsError): error = 'RequestError or NoResultsError when searching for {}'.format( unquote(criteria['place_name']) ) client.captureMessage(error) outbound_message['status'] = '503' logger.error(error) if run_from_management_command: # pragma: no cover return DataFrame() except Exception: # pragma: no cover outbound_message['status'] = '500' exception_type, _, exception_traceback = sys.exc_info() logger.error(exception_type) logger.error(pprint.pformat(traceback.format_tb(exception_traceback, limit=4))) if run_from_management_command: return DataFrame() if reply_channel is not None: # pragma: no cover # This is actually tested but coverage cant detect it Channel(reply_channel).send({ "text": json.dumps(outbound_message) }) if outbound_message['status'] == '200': return True
weights = df_train.WEIGHTING X_train = df_train[col_filter] y_train = df_train.DEFAULT_FLAG #%% import lightgbm as lgb #import pbar #%% myseed = 888 cv_folds = 5 ttu = 14 max_trees = 10000 #%% #%% objective function date = str(dt.now().year) + str(dt.now().month) + str(dt.now().day) def lgb_x_val_auc(param_list): lr = param_list[0] spw = param_list[1] mb = int(param_list[2]) nl = int(param_list[3]) mcw = int(param_list[4]) ss = param_list[5] csbt = param_list[6] alpha = param_list[7] mgts = param_list[8] mdil = int(param_list[9]) rl = param_list[10]
def save_handwritten_digit(screen): image_name = "test_" + datetime.now().strftime('%Y-%m-%d %H:%M') save_path = './test_data/{}.png'.format(image_name) pygame.image.save(screen, save_path) load_image(save_path)
def get_performance(): if request.method == "POST": print(str(request.form)) keys = list(request.form.keys()) print(keys) keys.remove('src') print(keys) idx = []; for key in keys: idx.append(request.form[key]) print(idx) tr_by_date_df=pd.read_sql_table('transaction_'+str(current_user.get_id()), db.engine, index_col='date') symbols=pf.get_symbols(tr_by_date_df) # try getting rates from sql and check that they are up to date try: cumrates=pd.read_sql_table('cumrates'+str(current_user.get_id()), db.engine, index_col='date') cumrates = Series(cumrates['0'], index=cumrates.index) today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) last_b_day = pd.date_range(start=today-Week(), end=today, freq=BDay())[-1]-BDay(1) last_day = cumrates.index[-1].to_datetime() print(today) print(last_day) print(last_b_day) if (last_day != last_b_day): [worth, cumrates, invalid]= pf.get_rates_df(pf.get_holdings(tr_by_date_df, symbols), symbols, tr_by_date_df) cumrates.to_sql('cumrates'+str(current_user.get_id()), db.engine, if_exists='replace') worth.to_sql('worth'+str(current_user.get_id()), db.engine, if_exists='replace') except: [worth, cumrates, invalid]= pf.get_rates_df(pf.get_holdings(tr_by_date_df, symbols), symbols, tr_by_date_df) cumrates.to_sql('cumrates'+str(current_user.get_id()), db.engine, if_exists='replace') worth.to_sql('worth'+str(current_user.get_id()), db.engine, if_exists='replace') [idx_rates, invalid] = pf.get_index_rates(pf.get_cashflow(tr_by_date_df), idx) if len(invalid)>0: err = "Invalid simbols: "+str(invalid) else: err = "" if (request.form['src'] == 'max'): print('requesting max') return jsonify(post_data(cumrates, idx_rates)) elif (request.form['src'] == 'ytd'): print('requesting ytd') #get last business day of last year last_day_of_last_year = pd.date_range('12/1/'+str(date.today().year-1), periods=1, freq='BM') #get cumrates from begining of the year and divide by last day of previous year ytd_cumrates = cumrates[str(date.today().year)+'-1-1':date.today()]/cumrates[last_day_of_last_year[0]] #repeat for index ytd_idx = idx_rates for i in idx: ytd_idx[i] = idx_rates[i][str(date.today().year)+'-1-1':date.today()]/idx_rates[i][last_day_of_last_year[0]] return jsonify(post_data(ytd_cumrates, ytd_idx)) elif (request.form['src'] == '1month'): print('requesting 1month') delta = BDay(20) last = BDay(21) month_cumrates = cumrates[cumrates.index[-1]-delta:cumrates.index[-1]]/cumrates[cumrates.index[-1]-last] month_idx = idx_rates for i in idx: month_idx[i] = idx_rates[i][idx_rates[i].index[-1]-delta:idx_rates[i].index[-1]]/idx_rates[i][idx_rates[i].index[-1]-last] return jsonify(post_data(month_cumrates, month_idx)) elif (request.form['src'] == '1year'): print('requesting 1year') delta = BDay(250) last = BDay(251) year_cumrates = cumrates[cumrates.index[-1]-delta:cumrates.index[-1]]/cumrates[cumrates.index[-1]-last] year_idx = idx_rates for i in idx: year_idx[i] = idx_rates[i][idx_rates[i].index[-1]-delta:idx_rates[i].index[-1]]/idx_rates[i][idx_rates[i].index[-1]-last] return jsonify(post_data(year_cumrates, year_idx)) return Response("ok")