def main(): df = get_df(DATA_PATH, columns) opening = add_index(df['Opening_price'], df['Datetime']) high = add_index(df['High_price'], df['Datetime']) low = add_index(df['Low_price'], df['Datetime']) closing = add_index(df['Closing_price'], df['Datetime']) price_dic = { 0: 'Opening', 1: 'High', 2: 'Low', 3: 'Closing', } plt.figure(figsize=(12, 4)) plt.title('Stock Price', fontsize=16) for i, price in enumerate([opening, high, low, closing]): plt.plot(price, label=f'{price_dic[i]} Price', linestyle='-.') plt.legend(bbox_to_anchor=(1.1, 1), loc='upper right') plt.savefig(os.path.join(fig_path, 'stock_price.png')) plt.figure(figsize=(12, 4)) plt.title('Previous day difference (Closing Price)', fontsize=16) plt.plot(closing.diff()) plt.savefig(os.path.join(fig_path, 'previous_day_difference.png'))
def get_df_with_binary_status(): ''' Replace 'status' feature column in df with a 'Developed' column and a 'Developing' column. If a country is 'Developed' there will be 1's in the 'Developed' column and 0's in the 'Developing' column. Vise versa for a 'Developing' country. Return the df. ''' df = get_df() # Convert categorial 'status' column into two dummy value columns: 'Developed' and 'Developing' binary_status = pd.get_dummies(df.status) df_with_binary_status = df.drop(columns='status').join(binary_status) return df_with_binary_status
def get_keywords(article, k=100): [dc, df] = get_df() words = set() word_count = {} keywords = [] for sentence in article: words.update(sentence) for word in sentence: if word not in word_count: word_count[word] = 0 word_count[word] += 1 for word in words: document_freq = df.get(word, 0) + 1 term_freq = word_count[word] score = term_freq * math.log(dc/document_freq) keywords.append([word, score]) keywords = sorted(keywords, reverse=True, key=lambda item: item[1])[:k] return [keyword[0].strip() for keyword in keywords if keyword[0].strip()]
def formirovka_result(path: str = config.PATH): files = get_files_from_dir(path) config_set = Config(path) result = pd.DataFrame() data_to_excel = {} for file in files: sample, file_path, config_set = preparing_for_charts( path, file, config_set) df = get_df(file_path) result = result.append(get_result_formirovka(df, config_set.config)) data_to_excel[sample] = df result.to_excel(f"{path}/result/result.xlsx") charts = [("Diffrent Cycles", sample, "Specific capacity,mA h/g", "Voltage, V") for sample in data_to_excel.keys()] cells = ["I2"] * len(data_to_excel) save_path = f"{path}/result/charts.xlsx" form_chart = FormirovkaCharts(save_path, charts, cells, data_to_excel) form_chart.insert_data() form_chart.close_writer()
import matplotlib.pyplot as plt from matplotlib.patches import Patch from analysis import FIRST_YEAR, LAST_YEAR, Color, get_countries_with_decreased_LE from get_data import SRC_DIR, get_df df = get_df() IMMUNIZATIONS = ['hepatitis_b', 'polio', 'diphtheria'] THINNESS = ['thinness_5-9_years', 'thinness_10-19_years'] EARLY_MORTALITY = ['under-five_deaths', 'infant_deaths'] DECREASED_LE = get_countries_with_decreased_LE() PALETTE = [Color.LIGHT_BLUE.value, Color.DARK_BLUE.value, Color.ORANGE.value] # Colors for line graph def plot_decreasing_LE_trend(trend, title, file_name, lower_limit=0, upper_limit=400): ''' Plot a grid of side-by-side line graphs showing specified trend for the countries whose LE decreased over the years spanned by the data. ''' # Extract relevant DataFrame columns filtered_data = df[['country', 'year']+trend] plt.style.use('seaborn-darkgrid') plt.figure(figsize=(10, 8)) num = 0 # Index of subplot for country in DECREASED_LE: # Isolate relevant DataFrame rows country_data = filtered_data[filtered_data.country.eq( country)].sort_values(by='year', ascending=True) num += 1
config = config() DATA_PATH = config.path.data_path columns = config.csv_columns valid_len = config.length.valid_len seq_length = config.length.seq_length fut_pred = config.length.fut_pred device = config.device seed = config.seed model_path = config.path.model_path fig_path = config.path.fig_path pred_path = config.path.pred_path seed_everything(seed) df = get_df(DATA_PATH, columns).reset_index(drop=True) price = df['Closing_price'] df_len = len(df) data, _, _, scaler = get_data(DATA_PATH, columns, valid_len) test_inputs = data[-seq_length:].tolist() model = LSTM() model.to(device) model.load_state_dict(torch.load(model_path)) def main(): for _ in range(fut_pred): seq = torch.FloatTensor(test_inputs[-seq_length:])