Beispiel #1
0
def main():
    df = get_df(DATA_PATH, columns)
    opening = add_index(df['Opening_price'], df['Datetime'])
    high = add_index(df['High_price'], df['Datetime'])
    low = add_index(df['Low_price'], df['Datetime'])
    closing = add_index(df['Closing_price'], df['Datetime'])

    price_dic = {
        0: 'Opening',
        1: 'High',
        2: 'Low',
        3: 'Closing',
    }

    plt.figure(figsize=(12, 4))
    plt.title('Stock Price', fontsize=16)
    for i, price in enumerate([opening, high, low, closing]):
        plt.plot(price, label=f'{price_dic[i]} Price', linestyle='-.')
        plt.legend(bbox_to_anchor=(1.1, 1), loc='upper right')
    plt.savefig(os.path.join(fig_path, 'stock_price.png'))

    plt.figure(figsize=(12, 4))
    plt.title('Previous day difference (Closing Price)', fontsize=16)
    plt.plot(closing.diff())
    plt.savefig(os.path.join(fig_path, 'previous_day_difference.png'))
def get_df_with_binary_status():
    ''' Replace 'status' feature column in df with a 'Developed' column and a 'Developing' column. If 
    a country is 'Developed' there will be 1's in the 'Developed' column and 0's in the 'Developing' 
    column. Vise versa for a 'Developing' country. Return the df. '''
    df = get_df()

    # Convert categorial 'status' column into two dummy value columns: 'Developed' and 'Developing'
    binary_status = pd.get_dummies(df.status)
    df_with_binary_status = df.drop(columns='status').join(binary_status)
    return df_with_binary_status
Beispiel #3
0
def get_keywords(article, k=100):
	[dc, df] = get_df()
	words = set()
	word_count = {}
	keywords = []
	for sentence in article:
		words.update(sentence)
		for word in sentence:
			if word not in word_count:
				word_count[word] = 0
			word_count[word] += 1
	for word in words:
		document_freq = df.get(word, 0) + 1
		term_freq = word_count[word]
		score = term_freq * math.log(dc/document_freq)
		keywords.append([word, score])
	keywords = sorted(keywords, reverse=True, key=lambda item: item[1])[:k]
	return [keyword[0].strip() for keyword in keywords if keyword[0].strip()]
Beispiel #4
0
def formirovka_result(path: str = config.PATH):
    files = get_files_from_dir(path)
    config_set = Config(path)
    result = pd.DataFrame()
    data_to_excel = {}
    for file in files:
        sample, file_path, config_set = preparing_for_charts(
            path, file, config_set)
        df = get_df(file_path)
        result = result.append(get_result_formirovka(df, config_set.config))
        data_to_excel[sample] = df
    result.to_excel(f"{path}/result/result.xlsx")
    charts = [("Diffrent Cycles", sample, "Specific capacity,mA h/g",
               "Voltage, V") for sample in data_to_excel.keys()]
    cells = ["I2"] * len(data_to_excel)
    save_path = f"{path}/result/charts.xlsx"
    form_chart = FormirovkaCharts(save_path, charts, cells, data_to_excel)
    form_chart.insert_data()
    form_chart.close_writer()
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

from analysis import FIRST_YEAR, LAST_YEAR, Color, get_countries_with_decreased_LE
from get_data import SRC_DIR, get_df

df = get_df()

IMMUNIZATIONS = ['hepatitis_b', 'polio', 'diphtheria']
THINNESS = ['thinness_5-9_years', 'thinness_10-19_years']
EARLY_MORTALITY = ['under-five_deaths', 'infant_deaths']
DECREASED_LE = get_countries_with_decreased_LE()
PALETTE = [Color.LIGHT_BLUE.value, Color.DARK_BLUE.value,
           Color.ORANGE.value]  # Colors for line graph


def plot_decreasing_LE_trend(trend, title, file_name, lower_limit=0, upper_limit=400):
    ''' Plot a grid of side-by-side line graphs showing specified trend for the countries
    whose LE decreased over the years spanned by the data. '''
    # Extract relevant DataFrame columns
    filtered_data = df[['country', 'year']+trend]

    plt.style.use('seaborn-darkgrid')
    plt.figure(figsize=(10, 8))

    num = 0  # Index of subplot
    for country in DECREASED_LE:
        # Isolate relevant DataFrame rows
        country_data = filtered_data[filtered_data.country.eq(
            country)].sort_values(by='year', ascending=True)
        num += 1
config = config()
DATA_PATH = config.path.data_path
columns = config.csv_columns
valid_len = config.length.valid_len
seq_length = config.length.seq_length
fut_pred = config.length.fut_pred
device = config.device
seed = config.seed
model_path = config.path.model_path
fig_path = config.path.fig_path
pred_path = config.path.pred_path

seed_everything(seed)

df = get_df(DATA_PATH, columns).reset_index(drop=True)
price = df['Closing_price']
df_len = len(df)

data, _, _, scaler = get_data(DATA_PATH, columns, valid_len)

test_inputs = data[-seq_length:].tolist()

model = LSTM()
model.to(device)
model.load_state_dict(torch.load(model_path))


def main():
    for _ in range(fut_pred):
        seq = torch.FloatTensor(test_inputs[-seq_length:])