Beispiel #1
0
import math
import scipy as sp
import pandas as pd
import numpy as np
from statistics import norm_pdf, norm_cdf
from matplotlib import pyplot as plt
from scipy import interpolate
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from BS_div import div_price_BS, div_delta_BS, div_gamma_BS, div_vega_BS, div_theta_BS, div_rho_BS
from vol_imp import vIP
#import datetime

# load data
data = pd.ExcelFile("vale3.xls")
#print(data.sheet_names)
# Define the columns to be read
columns1 = [
    'strike', 'bid', 'offer', 'halfs', 'Price Underlying', 'r', 'd', 'v', 'T',
    'Maturity', 'Code', 'Put/Call', 'Underlying Asset', 'Type Asset',
    'Exchange', '	Country'
]
data_opt = data.parse(u'1.1', names=columns1)
data_opt = data_opt.transpose()
data_opt = data_opt.values
# Variables and Parameters
S = data_opt[4, 0]  #S = Spot Price
r = data_opt[5, 0] / (252)  #r: Interest rate daily
# Para estimar volatilidade diaria, usar todos os negócios. Criar medida para ponderar por wtt negociada. Comparar tbm com max e min.
d = data_opt[6, 0] / (252)  #d: dividends payout
    def load_workbook(self,
                      fname,
                      wv_user=None,
                      wv_password=None,
                      wv_host_name=None):
        """

        :param fname:
        :param wv_user: In case of use of WebDav server, user name
        :param wv_password: In case of use of WebDav server, password
        :param wv_host_name: In case of use of WebDav server, host name
        :return: Number of added DataFrames
        """
        # Load a XLSX workbook into memory, as dataframes
        pr = urlparse(fname)
        if pr.scheme != "":
            # Load from remote site
            if not wv_host_name:
                wv_host_name = "nextcloud.data.magic-nexus.eu"
            if pr.netloc.lower() == wv_host_name:
                # WebDAV
                parts = fname.split("/")
                for i, p in enumerate(parts):
                    if p == wv_host_name:
                        url = "/".join(parts[:i + 1]) + "/"
                        fname = "/" + "/".join(parts[i + 1:])
                        break

                options = {
                    "webdav_hostname": url,
                    "webdav_login": wv_user,
                    "webdav_password": wv_password
                }
                client = wc.Client(options)
                with tempfile.NamedTemporaryFile(delete=True) as temp:
                    client.download_sync(remote_path=fname,
                                         local_path=temp.name)
                    f = open(temp.name, "rb")
                    data = io.BytesIO(f.read())
                    f.close()
            else:
                data = urllib.request.urlopen(fname).read()
                data = io.BytesIO(data)
            xl = pd.ExcelFile(
                xlrd.open_workbook(file_contents=data.getvalue()),
                engine="xlrd")
        else:
            xl = pd.ExcelFile(fname)
        cont = 0
        for sheet_name in xl.sheet_names:
            df = xl.parse(sheet_name, header=0)
            # Manage columns
            cols = []
            for col in df.columns:
                col_parts = col.split(".")
                if col.lower().startswith("unnamed"):
                    cols.append("")
                elif len(col_parts) > 1:
                    try:
                        int(col_parts[1])  # This is the case of "col.1"
                        cols.append(col_parts[0])
                    except:  # This is the case of "col_part.col_part" (second part is string)
                        cols.append(col)
                else:
                    cols.append(col)

            df.columns = cols
            self._dataframes.append(df)
            self._dataframe_names.append(sheet_name)
            cont += 1

        return cont
                            ZippedFilesloc, and path_processed_data in a new elif block")
path_to_helper = os.path.join(path_working,
                                          "analysis",
                                          "exploratory",
                                          "03_Compare_rawnav_to_field_dir")
sys.path.append(path_to_helper)

# Import custom functions
import wmatarawnav as wr
from helper_function_field_validation import correct_data_types
from helper_function_field_validation import combine_field_rawnav_dat
from helper_function_field_validation \
    import quick_and_dirty_schedule_qjump_mapping
# 2. Read and clean field data
# -----------------------------------------------------------------------------
field_xlsx_wb = pd.ExcelFile(path_field_file)
field_dict = {}

col_keep = [
    'Metrobus Route', 'Bus ID', "Today's Date", 'Signal Phase',
    'Time Entered Stop Zone', 'Time Left Stop Zone', 'Front Door Open Time',
    'Front Door Close Time', 'Rear Door Open Time', 'Rear Door Close Time',
    'Dwell Time', 'Number of boardings', 'Number of alightings',
    'Total Time at Intersection', 'Traffic Conditions', 'Notes',
]
col_new_names = [
    'metrobus_route_field', 'bus_id_field', 'date_obs_field',
    'signal_phase_field', 'time_entered_stop_zone_field',
    'time_left_stop_zone_field', 'front_door_open_time_field',
    'front_door_close_time_field', 'rear_door_open_time_field',
    'rear_door_close_time_field', 'dwell_time_field',
Beispiel #4
0
    'indicator_category': 'Social',
    'indicator_type': 'ICT',
    'multiplier': 1000,
    'prefix': 'eclac',
    'suffix': 'fb'
}

# <markdowncell>

# Generation 1 - This data was collected via the Facebook Advertising interface - https://www.facebook.com/ads/create

# <codecell>

file_name = "FacebookData.xlsx"
popfile = config["gen_1_dir"] + file_name
xlfile = pd.ExcelFile(popfile)
xlfile.sheet_names  # The source file has several different estimates available. We are just using the base scenario ('ESTIMATES'), plus the 'NOTES' sheet.

# <markdowncell>

# Generation 2 - Refines the rough csv data from Generation 1 into a standardized csv format common to all data sets. Prepares this data for importing to the database.

# <markdowncell>

# First, create the DataFrame with all population information. The "Country code" column contains the ISO 3166 numeric value, but as an integer, rather than as 3-numeral string padded by zeros. So we fix this to be in line with the ISO spec, and, hence, with our data. We also rename the Country code field to match the name in our schema.

# <codecell>

df = xlfile.parse("data", header=0)
df = df.rename(columns={
    "Country": "name",
Beispiel #5
0
import cplex
import copy
import pandas as pd
import numpy as np
from dijkstrasalgoritm import dijkstra, graph_creator
from col_generation import col_generation

## Load data
xl = pd.ExcelFile("Input_AE4424_Ass1P1.xlsx")
dfs = {sheet: xl.parse(sheet) for sheet in xl.sheet_names}

## Create sets
arcs = range(len(dfs['Arcs'].Arc))
origins = dfs['Arcs'].From
destinations = dfs['Arcs'].To
locations = pd.concat([dfs['Arcs'].From,dfs['Arcs'].To]).unique()
commodities = range(1, len(dfs['Commodities'].Commodity) + 1)
quantity = np.array(dfs['Commodities'].Quant)

## Create input matrices
graph = graph_creator()
A_ineq = np.zeros((len(arcs),len(commodities)))
C = []
for i in range(len(commodities)):
    path, C_i = dijkstra(graph, dfs['Commodities'].From[i], dfs['Commodities'].To[i])
    C.append(float(C_i*quantity[i])) #aangepast door te vermenigvuldigen met quantity[i]
    for j in range(len(path)-1):
        index = dfs['Arcs'].index[(dfs['Arcs'].From == path[j]) & (dfs['Arcs'].To == path[j+1])]
        A_ineq[index,i] = 1*dfs['Commodities'].Quant[i]
A_eq = np.eye(len(commodities))
rhs_ineq = list(dfs['Arcs'].Capacity)
Beispiel #6
0
import numpy as np
import pandas as pd
import xlrd
import openpyxl

xlsx = pd.ExcelFile('imiona.xlsx')
df = pd.read_excel(xlsx, 'Arkusz1')
print(df)
Beispiel #7
0
# In[1]:


import pandas as pd 
import numpy as np
import random
from math import e 


# In[44]:


# data = pd.read_csv('C:/Users/OmarVr/Documents/ITESO/Maestria/IDI/perceptron_simple.csv')
# data = pd.read_csv('C:/Users/if686748/Documents/perceptron_simple.csv')
#data = pd.ExcelFile('/Users/omar/Documents/Python/Maestria/IDI/tabla_para_probar.xlsx')
data = pd.ExcelFile('C:/Users/if686748/Downloads/tabla_para_probar.xlsx')
data = data.parse(data.sheet_names[0], header=0)
x = data.iloc[:,:-2]
d = data.iloc[:,-2:]
x


# In[45]:


N = len(x.iloc[0]) # Numero de entradas/ variables/columnas
M = 2 #numero de salidas / variables dependientes
Q = len(data) # Filas / Learning patterns 
L = N*M  # recomendacion N * M 
print(N,M,Q,L)
Beispiel #8
0
import pandas as pd
import numpy as np

file1 = '../data/STRIDE_PATIENT.xlsx'
x1 = pd.ExcelFile(file1)
stride_patient = x1.parse('Sheet1')

file2 = '../data//SURGERY.xlsx'
x2 = pd.ExcelFile(file2)
surgery = x2.parse('Sheet1')

stride_patient_req = stride_patient
pat_surgery = pd.merge(stride_patient_req, surgery, on='PAT_DEID', how='inner')
pat_surgery['BIRTH_DATE'] = pat_surgery['BIRTH_DATE'].str[
    0:7] + '19' + pat_surgery['BIRTH_DATE'].str[7:]
pat_surgery['SURGERY_DATE'] = pat_surgery['SURGERY_DATE'].str[
    0:7] + '20' + pat_surgery['SURGERY_DATE'].str[7:]

pat_surgery['BIRTH_DATE'] = pd.to_datetime(pat_surgery['BIRTH_DATE'])
pat_surgery['SURGERY_DATE'] = pd.to_datetime(pat_surgery['SURGERY_DATE'])
print(pat_surgery.dtypes)

pat_surgery['Difference'] = pat_surgery['SURGERY_DATE'].sub(
    pat_surgery['BIRTH_DATE'], axis=0)
pat_surgery['AGE AT SURGERY'] = pat_surgery['Difference'] / np.timedelta64(
    365, 'D')
pat_surgery['AGE AT SURGERY'] = pat_surgery['AGE AT SURGERY'].astype(int)

pat_surgery = pat_surgery.drop(['BIRTH_DATE', 'SURGERY_DATE', 'Difference'],
                               axis=1)
print(pat_surgery.dtypes)
#_*_coding:utf-8_*_
import pandas as pd
import numpy as np
from docxtpl import DocxTemplate, InlineImage
from docx import shared
import os
# have_content = False

file = pd.ExcelFile("data\小于400三调精度.xlsx")
df = file.parse("小于400三调精度")

x_data = np.array(df)
x_list = x_data.tolist()

index = 0
while index < len(x_list):
    tpl = DocxTemplate("插图模板.docx")
    values = x_list[index]
    # values.pop()
    file_name = "data\\文档\\" + str(x_list[index][0]) + ".docx"
    gaoqing_name = "data\\高清\\" + x_list[index][3] + ".tif"
    update_name = "data\\更新\\" + x_list[index][3] + ".tif"

    if os.path.exists(gaoqing_name) == True:
        if os.path.exists(update_name):
            lableID = values[1]
            context = \
                {
                    "col_labels": ["序号", "ID", "行政区代码", "地块序号", "地块面积(平方米)", \
                                   "地块面积(亩)", "核查结果"],
                    "infos": [InlineImage(tpl, gaoqing_name, width=shared.Cm(7.75), height=shared.Cm(7)),
def find_matches(teller, noemer, goktekst, match, file):
    def gcd(a, b):
        """Calculate the Greatest Common Divisor of a and b.

            Unless b==0, the result will have the same sign as b (so that when
            b is divided by it, the result comes out positive).
            """
        while b:
            a, b = b, a % b
        return a

    def simplify_fraction(numer, denom):
        if denom == 0:
            return "Division by 0 - result undefined"

        # Remove greatest common divisor:
        common_divisor = gcd(numer, denom)
        (reduced_num, reduced_den) = (numer / common_divisor,
                                      denom / common_divisor)
        # Note that reduced_den > 0 as documented in the gcd function.

        if reduced_den == 1:
            return "%d/%d is simplified to %d" % (numer, denom, reduced_num)
        elif common_divisor == 1:
            return "%d/%d is already at its most simplified state" % (numer,
                                                                      denom)
        else:
            return reduced_num, reduced_den
            #return "%d/%d is simplified to %d/%d" % (numer, denom, reduced_num, reduced_den)

    def splitword(word):
        return [char for char in word]

    df = pd.read_excel(pd.ExcelFile(file), "Sheet1")

    letters = splitword(goktekst)

    letter_list = []
    for l in letters:
        locations = df[df == l].stack().index.tolist()
        letter_list.append(locations)

    # turn the number pairs into fractions
    factored_letter_list = letter_list
    combination2 = [p for p in itertools.product(*letter_list)]
    for i in range(len(letter_list)):
        for j in range(len(letter_list[i])):
            factored_letter_list[i][j] = ((letter_list[i][j][0] + 1) /
                                          letter_list[i][j][1])

    combination = [p for p in itertools.product(*factored_letter_list)]
    ratio = teller / noemer
    n = 0
    print(f"Ratio = {ratio}")
    for i in range(len(combination)):
        # find and print the combinations that we've found
        sum = 0
        for j in range(len(combination[i])):
            sum += combination[i][j]
            if (
                    abs(sum - ratio) < match
            ):  # exacte getallen matchen niet, maar een heel klein verschil is goed genoeg
                numbers1 = []
                numbers2 = []
                # make answer combinations
                for combo_counter in range(len(
                        combination2[i])):  # store tuple values in two lists
                    numbers1.append(combination2[i][combo_counter][0] + 1)
                    numbers2.append(combination2[i][combo_counter][1])
                    # print(combination2[i][combo_counter])
                # check answer
                # calculate noemer
                reconstructed_noemer = 1
                for k in range(len(numbers2)):
                    reconstructed_noemer *= numbers2[k]
                #print("Noemer is ", reconstructed_noemer)
                # calculate teller
                reconstructed_teller = 0
                for i in range(len(numbers1)):
                    reconstructed_teller += (
                        (numbers1[i] * reconstructed_noemer) / numbers2[i])
                reconstructed_noemer2 = simplify_fraction(
                    reconstructed_teller, reconstructed_noemer)[1]
                if reconstructed_noemer2 == noemer:
                    n += 1
                    print(f"\n\nCombinatie nr. {n} gevonden :)")
                    print(f"Som is {sum}")
                    print(f"Verschil is {sum - ratio}")
                    for k in range(len(letters)):  # print out the answer
                        print(
                            f"Letter {letters[k]} staat in vraag {numbers1[k]} van 20{numbers2[k]}"
                        )

                    #print("Teller is", reconstructed_teller)
                    print(
                        f"Oorspronkelijke teller en noemer zijn: {teller} en {noemer}"
                    )
                    print(
                        f"De gevonden teller en noemer zijn:     {simplify_fraction(reconstructed_teller, reconstructed_noemer)}"
                    )
Beispiel #11
0
import pandas as pd
import numpy as np

xl_file = pd.ExcelFile(
    "D:/jakubicek/Rot_detection/data/training/labels_bin.xlsx")

data = pd.read_excel(xl_file, header=None)

file_names = data[0].values.tolist()

labels = data.loc[:, 1:7].to_numpy()
Beispiel #12
0
# Handle excel files with pandas
import pandas as pd
# Read .xlsx file using pandas
excFile = pd.ExcelFile('InputSheets.xlsx')
# Export 0_Deaths_1962_2016 as 0_Deaths_1962_2016.csv
df0 = excFile.parse('0_Deaths_1962_2016')
df0.to_csv('0_Deaths_1962_2016.csv', index=False, encoding='utf-8')
# Export sheet 1_Yearly_AMT as 1_Yearly_AMT.csv
df0 = excFile.parse('1_Yearly_AMT')
df0.to_csv('1_Yearly_AMT.csv', index=False, encoding='utf-8')
# Export sheet 2_EstCO2_AMT as 2_EstCO2_AMT.csv
df0 = excFile.parse('2_EstCO2_AMT')
df0.to_csv('2_EstCO2_AMT.csv', index=False, encoding='utf-8')
# Export sheet 3_Attrib_Year_AMT as 3_Attrib_Year_AMT.csv
df0 = excFile.parse('3_Attrib_Year_AMT')
df0.to_csv('3_Attrib_Year_AMT.csv', index=False, encoding='utf-8')
# Export 4_Prod_Year_AMT as 4_Prod_Year_AMT.csv
df0 = excFile.parse('4_Prod_Year_AMT')
df0.to_csv('4_Prod_Year_AMT.csv', index=False, encoding='utf-8')
# Export sheet 5_Prod_Transm_AMT as 5_Prod_Transm_AMT.csv
df0 = excFile.parse('5_Prod_Transm_AMT')
df0.to_csv('5_Prod_Transm_AMT.csv', index=False, encoding='utf-8')
# Export sheet 6_Prod_Cars as 6_Prod_Cars.csv
df0 = excFile.parse('6_Prod_Cars')
df0.to_csv('6_Prod_Cars.csv', index=False, encoding='utf-8')
# Export sheet 7_Prod_Truck as 7_Prod_Truck.csv
df0 = excFile.parse('7_Prod_Truck')
df0.to_csv('7_Prod_Truck.csv', index=False, encoding='utf-8')
# Export sheet 8_CO2Em_2012 as 8_CO2Em_2012.csv
df0 = excFile.parse('8_CO2Em_2012')
df0.to_csv('8_CO2Em_2012.csv', index=False, encoding='utf-8')
Beispiel #13
0
def generateFavorites(request):
    # Consultar el grupo actual del admin
    id_group = get_id_group(request.user.id)['GROUP_ID'][0]
    # Consultar ids de usuarios del grupo administrado
    ids_users = str(
        get_id_users_by_idGroup(id_group)['USER_ID'].tolist()).replace(
            '[', '(').replace(']', ')')
    log = get_register_byListIDs(ids_users)
    folder = 'static/profitability/update_presupuesto'

    # Unificar parametros y desembolsos de "Nuevos"
    parametrosNvUnificados = pd.DataFrame()
    desembolsosNvUnificados = pd.DataFrame()
    identificadorUnico = 1000
    fileNames = ''
    for i, row in log.iterrows():
        fileNames = fileNames + '||' + str(row['FILE_INPUT'])
        Newfile = folder + '/' + row['FILE_INPUT']
        Newpath = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                               '../' + Newfile)
        Newxlsx_inputs = pd.ExcelFile(Newpath)
        parametros_nv = pd.DataFrame(
            pd.read_excel(Newxlsx_inputs, sheet_name='ParametrosNv'))
        parametros_nv = parametros_nv.dropna(
            subset=['Id_Tool'])  # Eliminar registros cuyo Id_Tool sea nulo
        parametrosNvUnificados = pd.concat(
            [parametrosNvUnificados, parametros_nv], sort=False)
        desembolsos_nv = pd.DataFrame(
            pd.read_excel(Newxlsx_inputs, sheet_name='DesembolsosNv'))
        desembolsos_nv = desembolsos_nv.dropna(
            subset=['Id_Tool'])  # Eliminar registros cuyo Id_Tool sea nulo
        desembolsosNvUnificados = pd.concat(
            [desembolsosNvUnificados, desembolsos_nv], sort=False)
        # Agregar identificador único a Id_Tool
        parametrosNvUnificados[
            'Id_Tool'] = parametrosNvUnificados['Id_Tool'] + identificadorUnico
        desembolsosNvUnificados['Id_Tool'] = desembolsosNvUnificados[
            'Id_Tool'] + identificadorUnico
        identificadorUnico = identificadorUnico + 1000

    parametrosNvUnificados = parametrosNvUnificados.fillna(0).reset_index(
        drop=True)
    desembolsosNvUnificados = desembolsosNvUnificados.fillna(0).reset_index(
        drop=True)

    filename = log['FILE_INPUT'][0]
    file = folder + '/' + filename
    path = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                        '../' + file)
    xlsx_inputs = pd.ExcelFile(path)

    registro, insert_id = create_register(request, fileNames, is_union=1)
    # Guardar registro (Log de eventos)
    if registro < 1:
        status = 0
        message = 'ERROR: Se presento un error al intentar guardar el intento (Log de eventos).'
        file_return = ''
    else:
        try:
            total_time, file_output = get_data(
                file,
                parametrosNvUnificados=parametrosNvUnificados,
                desembolsosNvUnificados=desembolsosNvUnificados)
            status = 1
            message = 'Presupuesto generado con éxito.'
            file_return = file_output
            update_register(insert_id=insert_id,
                            file_output=file_output,
                            success=1,
                            total_time=total_time,
                            error='')
        except:
            status = 0
            error = traceback.format_exc()
            message = 'ERROR:<br>Se presento un error al momento de generar el presupuesto, por favor contacte con el administrador.'
            file_return = ''
            update_register(insert_id=insert_id,
                            file_output='Error al generar archivo',
                            success=0,
                            total_time=0,
                            error=error)

    data = {"status": status, "message": message, "file_return": file_return}
    return JsonResponse(data, safe=False)
Beispiel #14
0
def create_charts(path):
    #read the latest file
    latest_file = sorted(glob.iglob(path + '/*'), key=os.path.getmtime)[-1]

    df_data = pd.DataFrame()
    #get date based on file name
    date = latest_file[len(path):-4]
    status_file = pd.ExcelFile(latest_file, sort=True)
    stat_file = pd.read_csv('processing/data/IPOstatus/stat.csv')

    #status data
    for sheet in status_file.sheet_names[1:]:
        data = status_file.parse(sheet, header=[2], index_col=0, skipfooter=1)
        new_columns = [
            data.columns[i - 1] +
            "二" if data.columns[i].find("Unnamed") >= 0 else data.columns[i]
            for i in range(len(data.columns))
        ]
        data.columns = new_columns
        data['date'] = date
        data['板块'] = sheet
        df_data = df_data.append(data, ignore_index=True)
    province = transform(df_data['注册地'].tolist())['省']
    df_data['省'] = [x[:-1] if len(x) == 3 else x[0:2] for x in province.values]
    df_data.replace('', np.nan, inplace=True)
    df_data['省'].fillna(df_data['注册地'], inplace=True)
    # print(df_data['省'].value_counts().tolist())
    # print(df_data['省'].value_counts().index.tolist())

    #stat data
    #stat_file.drop(columns='waiting',inplace=True)
    #stat_file.rename(columns={"date": "日期", "total": "受理企业总数","passed":"已过会","queue":"待审企业","failed":"中止审查企业"},inplace = True)
    latest_stat = stat_file.iloc[-1]
    date_stat = stat_file['date']
    total_stat = stat_file['total']
    diff_stat = stat_file['total'] - stat_file['total'].shift(1)
    passed_stat = list(stat_file['passed'])
    queue_stat = list(stat_file['queue'])
    failed_stat = list(stat_file['failed'])

    ##################################################################################
    page = Page()

    style = Style(width=1100, height=600)
    value = df_data['省'].value_counts().tolist()
    attr = df_data['省'].value_counts().index.tolist()
    data = [(name, val) for (name, val) in zip(attr, value)]
    chart = Map("IPO申报企业分布图", "摸鱼科技", title_pos='center', **style.init_style)
    chart.add(
        "",
        attr,
        value,
        maptype='china',
        is_visualmap=True,
        is_label_show=True,
        visual_text_color='#000',
        tooltip_formatter=geo_formatter,  # 重点在这里,将函数直接传递为参数。
        label_emphasis_textsize=15,
        label_emphasis_pos='right',
    )
    page.add(chart)

    #
    bar_diff = Bar("")
    bar_diff.add("受理企业总数", date_stat, total_stat)
    bar_diff.add("增长(减少)企业数", date_stat, diff_stat, legend_pos="15%")

    bar_stat = Bar("申报企业情况", "摸鱼科技")
    bar_stat.add("已过会", date_stat, passed_stat, is_stack=True)
    bar_stat.add("待审企业", date_stat, queue_stat, is_stack=True)
    bar_stat.add("中止审查企业",
                 date_stat,
                 failed_stat,
                 is_stack=True,
                 legend_pos="60%")

    chart = Grid(width=WIDTH)
    chart.add(bar_stat, grid_left="60%")
    chart.add(bar_diff, grid_right="60%")
    page.add(chart)

    #
    v1 = df_data['所属行业'].value_counts().tolist()
    attr = df_data['所属行业'].value_counts().index.tolist()
    pie = Pie("所属行业分布", "摸鱼科技", title_pos="center", **style.init_style)
    pie.add("",
            attr,
            v1,
            radius=[45, 55],
            center=[50, 50],
            legend_pos="85%",
            legend_orient='vertical')
    page.add(pie)

    #
    total_counts = df_data['板块'].count()
    chart = Pie('申报企业所占板块的比例',
                "申报企业总数: " + str(total_counts),
                title_pos='center',
                **style.init_style)
    for exchange, counts, position in zip(df_data['板块'].unique(),
                                          df_data['板块'].value_counts(),
                                          range(1, 4)):
        chart.add("", [exchange, ""], [counts, total_counts - counts],
                  center=[25 * position, 30],
                  radius=[28, 34],
                  label_pos='center',
                  is_label_show=True,
                  label_text_color=None,
                  legend_top="center")
    page.add(chart)

    #
    attr1 = [
        attr.replace("(特殊普通合伙)", "").replace('(特殊普通合伙)',
                                             '').replace('(特殊普通合伙)', '')
        for attr in df_data['会计师事务所'].unique().tolist()
    ]
    attr2 = df_data['保荐机构'].unique().tolist()
    v1 = df_data['会计师事务所'].value_counts().tolist()
    v2 = df_data['保荐机构'].value_counts().tolist()
    #chart_accountants
    chart_accountants = Bar("会计师事务所 - 统计图",
                            "摸鱼科技",
                            title_pos="center",
                            **style.init_style)
    chart_accountants.add("会计师事务所",
                          attr1,
                          v1,
                          legend_pos="75%",
                          mark_point=["max", "min"],
                          is_datazoom_show=True,
                          datazoom_range=[0, 40],
                          datazoom_type='both',
                          xaxis_interval=0,
                          xaxis_rotate=30,
                          yaxis_rotate=30)
    chart = Grid(width=WIDTH)
    chart.add(chart_accountants, grid_bottom="30%")
    page.add(chart)
    #chart_sponsor
    chart_sponsor = Bar("保荐机构 - 统计图",
                        "摸鱼科技",
                        title_pos="center",
                        **style.init_style)
    chart_sponsor.add("保荐机构",
                      attr2,
                      v2,
                      legend_pos="75%",
                      mark_point=["max", "min"],
                      is_datazoom_show=True,
                      datazoom_range=[0, 40],
                      datazoom_type='both',
                      xaxis_interval=0,
                      xaxis_rotate=30,
                      yaxis_rotate=30,
                      yaxis_margin=50)
    chart = Grid(width=WIDTH)
    chart.add(chart_sponsor, grid_bottom="30%")
    page.add(chart)

    return page
reader = csv.reader(f,dialect = my_dialect) #上記でdelimater = ~とかやったのをまとめたもの

#6-1-4 json data
#6-1-5 XML HTML
conda install lxml
tables = pd.read_html()
len(table2)
#6-1-5-1 lxml.objectify を使ったXMLの読み込み
#6-2 バイナリデータ形式で効率よくデータを書き出す(computerノミが読めるデータ)
df = pd.read_csv("hubble.csv")
df.to_pickle("hubble_pickle") #pickledataで出力
pd.read_pickle("hubble_pickle")

#6-2-1 HDF5形式の使用
#6-2-2 excelfile 読み込み
a = pd.ExcelFile("trip_information.xlsx")
b = pd.read_excel(a,"result")#sheetを指定する
writer = pd.ExcelWriter("trip_information.xlsx")
a.to_excel(writer,"Sheet2")
writer.save()

#6-3 web APIを用いたデータの取得
import requests
url = "https://api.github.com/repos/pandas-dev/pandas/issues"
resp = requests.get(url)
data = resp.json()
data[0] #data={{},{}}的な
issues = pd.DataFrame(data,columns = ["number","title"])

#6-4データベースからデータの取得
import sqlite3
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Using Plotly with Spyder
# https://community.plot.ly/t/plotly-for-spyder/10527/2
from plotly.offline import plot

# os.chdir(r'C:\Users\abibeka\OneDrive - Kittelson & Associates, Inc\Documents\HCM-CAV-Pooled-Fund\ExperimentalDesignArterial\Results')
# os.getcwd()
MainDir = r"C:\Users\abibeka\OneDrive - Kittelson & Associates, Inc\Documents\HCM-CAV-Pooled-Fund\Experimental Design Arterial\Results\Results Protected"

# Read once --- Takes time to load
VolTimeIntDat = pd.read_csv(os.path.join(MainDir, "VolumeTimeIntervalMap.csv"))
x1 = pd.ExcelFile(os.path.join(MainDir, "Results_MPR_Plotting_Exp.xlsx"))


def ReLab(x):
    MprLab = {
        "0PerMPR": "0",
        "20PerMPR": "20",
        "40PerMPR": "40",
        "60PerMPR": "60",
        "80PerMPR": "80",
        "100PerMPR": "100",
    }
    return MprLab[x]


def ReLab_Gap(x):
Beispiel #17
0
    def __init__(self, subjectDir, dbLoc):
        self.location = subjectDir

        dicomDirDict = {}

        pbar = ProgressBar()
        for root, dirs, files in os.walk(self.location):
            dicoms = []
            for oneFile, i in zip(files, pbar(range(6000))):
                if re.search('(dcm|ima)', oneFile, re.IGNORECASE):
                    dicoms.append(os.path.join(root, oneFile))
            if not dicoms == []: dicomDirDict[root] = dicoms

        self.dicomDirs = dicomDirDict
        self.dirs = dicomDirDict.keys()
        self.allDicoms = reduce(lambda x, y: x + y, dicomDirDict.values())
        self.allDicomNum = len(self.allDicoms)
        self.dirDicomNum = [(x, len(y)) for (x, y) in dicomDirDict.iteritems()]
        self.firstDicom = self.allDicoms[0]
        self.modalityMapping = [modalityMapping(x) for x in self.dirs]
        self.modalityDicomNum = dict(
            zip(self.modalityMapping, [x[1] for x in self.dirDicomNum]))

        ds = dicom.read_file(self.firstDicom)
        self.age = re.search('^0(\d{2})Y', ds.PatientAge).group(1)
        self.dob = ds.PatientBirthDate
        self.id = ds.PatientID
        self.surname = ds.PatientName.split('^')[0]
        self.name = ds.PatientName.split('^')[1]
        try:
            self.fullname = ''.join([
                x[0].upper() + x[1:].lower() for x in [
                    self.surname,
                    self.name.split(' ')[0],
                    self.name.split(' ')[1]
                ]
            ])
            self.initial = self.surname[0] + ''.join(
                [x[0] for x in self.name.split(' ')])
        except:
            self.fullname = ''.join([
                x[0].upper() + x[1:].lower()
                for x in [self.surname, self.name]
            ])
            self.initial = self.surname[0] + self.name[0]

        self.sex = ds.PatientSex
        self.date = ds.StudyDate
        self.experimenter = getpass.getuser()

        print 'Now collecting information for'
        print '=============================='
        print '\n\t'.join([
            self.location, self.fullname, self.initial, self.id, self.dob,
            self.date, self.sex, ', '.join(self.modalityMapping),
            'by ' + self.experimenter
        ])
        print '=============================='

        self.koreanName = raw_input('Korean name  ? eg. 김민수: ')
        self.note = raw_input('Any note ? : ')
        self.group = raw_input('Group ? : ')
        self.numberForGroup = maxGroupNum(os.path.join(dbLoc, self.group))
        self.study = raw_input('Study name ? : ')
        self.timeline = raw_input(
            'baseline or follow up ? eg) baseline, 6mfu, 1yfu, 2yfu : '
        )  #bienseo: Solve unicode-error problems

        #bienseo: Classify timeline(baseline or follow up)

        if self.timeline != 'baseline':
            df = pd.ExcelFile(os.path.join(dbLoc, 'database',
                                           'database.xls')).parse(0)

            self.folderName = df.ix[(df.timeline == 'baseline') &
                                    (df.patientNumber == int(self.id)),
                                    'folderName'].values.tolist()[0]
            #bienseo: Show back up folder name
            print '\n\n       Now Back up to       ' + self.folderName + '\n\n'
            self.targetDir = os.path.join(
                dbLoc, self.group, self.folderName,
                self.timeline)  #bienseo: baseline, followUp, 1yfu ...
        else:
            self.folderName = self.group + self.numberForGroup + '_' + self.initial
            self.targetDir = os.path.join(dbLoc, self.group, self.folderName,
                                          self.timeline)
import pandas as pd
import xlrd
import re

d = pd.ExcelFile('\Find No of Ctns.xlsx')
df = d.parse('sheet1', skiprows=0)
for a in range(df.shape[0]):
    if not isinstance(df['Description'][a], basestring):
        df = df.drop([a])
df = df.reset_index()
del df['index']

Q = []
for a in df.Qty:
    aa = re.search('\d+', a)
    if aa:
        Q.append(aa.group(0))

des = []
for b in df.Qty:
    bb = re.search('\D+', b)
    if bb:
        des.append(bb.group(0))

u = []
for c in df.Description:
    cc = re.search('(\d+)(BOTTLE)', c)
    ccc = re.search('(\d+)[X]', c)
    if cc:
        u.append(cc.group(1))
    elif ccc:
Beispiel #19
0
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals
from scipy import sparse
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import ExcelFile

Path_output = 'C:/Users/Admin/Desktop/output.xlsx'
Path_input = 'C:/Users/Admin/Desktop/input.xlsx'
Input = pd.ExcelFile(Path_input)
inputData = pd.read_excel(Input, 0, header=None)
Output = pd.ExcelFile(Path_output)
outputData = pd.read_excel(Output, 0, header=None)
Y_out = outputData.values.T[0]
X = inputData[0:27].values
X = X.T


def softmax(V):
    e_V = np.exp(V - np.max(V, axis=0, keepdims=True))
    Z = e_V / e_V.sum(axis=0)
    return Z


def convert_labels(y, C=3):
    Y = sparse.coo_matrix((np.ones_like(y), (y, np.arange(len(y)))),
                          shape=(C, len(y))).toarray()
    return Y
# %%
"""
Created on Tue Sep  6 00:08:31 2016

@author: ajoshi
"""
from dfsio import readdfs, writedfs
from nilearn import image
import scipy as sp
import pandas as pd
import matlab.engine as meng
from shutil import copyfile

xl = pd.ExcelFile("/big_disk/ajoshi/coding_ground/hybridatlas/hybrid_atlas\
_adjusted_labels_13June2017.xlsx")

df = xl.parse("Sheet1")

oldID = df['oldID']
newID = df['newID']

left_mid = readdfs('/big_disk/ajoshi/coding_ground/hybridatlas/\
USCBrain_06_17_2017/BCI-DNI_brain.left.mid.cortex.dfs')

right_mid = readdfs('/big_disk/ajoshi/coding_ground/hybridatlas/\
USCBrain_06_17_2017/BCI-DNI_brain.right.mid.cortex.dfs')

v_lab = image.load_img('/big_disk/ajoshi/coding_ground/hybridatlas/\
USCBrain_06_17_2017/BCI-DNI_brain.label.nii.gz')

data1 = v_lab.get_data()
Beispiel #21
0
    sum = 0.0
    suf = 0.0
    for i in range(0, len(lista)):
        sum = sum + lista[i]

    return sum / len(lista)


def varianza(lista):
    sum = 0.0
    for i in range(0, len(lista)):
        sum = sum + math.pow((lista[i] - media(lista)), 2)
    return sum / (len(lista))


xls = pd.ExcelFile('DATOS2.xlsx')
print(xls.sheet_names)
df = xls.parse()

lf = []
lf = df.values.tolist()

#print(lf)

reales = []
simulados = []
for i in range(len(lf)):
    for j in range(len(lf[i])):
        if (j == 0):
            reales.append(lf[i][j])
Beispiel #22
0
import xlrd
from colorama import Fore, Style

import pandas as pd

# =============================================================================
# # ===========================================================================
# # #--------------------------------------------------------------------------
# # #                Panda Read in CMR Data from Excel File
# # #
# # #--------------------------------------------------------------------------
# # ===========================================================================
# =============================================================================

# read Excel (2003) File:
data_xls = pd.ExcelFile('CMR.xls')

data = data_xls.parse('Sheet1', na_values=['NA'])

# =============================================================================
# # ===========================================================================
# # #--------------------------------------------------------------------------
# # #     Plot to select Depth level to assess the Petrophysical Properties
# # #       at that level
# # #--------------------------------------------------------------------------
# # ===========================================================================
# =============================================================================

fig = plt.figure(figsize=(6,
                          18))  # manually adjust x,y dimension of plot canvas
fig.suptitle('Select Depth to Estimate Pc and Thin Section',
longer time period allows one to obtain the bias in the signal arising from periods of
low water level
larger backlog or interval reduces the number of valid points
'''

station = 'WestBay_Stn1_'

print(inspect.getfile(inspect.currentframe())) 
print(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))))

print('begin processing')

if process:   
    excelFile = 'data/LagunaLake/water_quality/Water_Quality_data_LLDA_v2_1999_2016.xlsx'
    
    datFile = pd.ExcelFile(excelFile)
    sheets = datFile.sheet_names
    dft = pd.DataFrame()
    
    # collect all locations
    for sheet in sheets:
        if 'Figures' not in sheet and 'PrimProd' not in sheet:
            dff = pd.read_excel(excelFile, sheetname = sheet)
            dff = dff[['Salinity (Chloride)','Date']]
            dff.rename(columns = {'Salinity (Chloride)' : 'Salinity ' + sheet}, inplace = True)
            dff.rename(columns = {'Date' : 'date'}, inplace = True)
            dff.set_index('date',inplace = True)
            dff.replace('-',np.nan, inplace = True)
            dff = dff[~dff.index.duplicated(keep='first')]
            dft = pd.concat([dff,dft], axis = 1, ignore_index = False)
    
def parseXL(self, xlfile):

    try:
        xl = pd.ExcelFile(xlfile)
        tmpdf = xl.parse(xl.sheet_names[0])

        # get the indices for the rows where the L1 headers are present
        data_l1 = tmpdf.index[tmpdf[tmpdf.isin(
            DATA_L1_HDR_KEYS)].notnull().any(axis=1)].tolist()
        # get indices for rows where the L2 headers are present
        # these will indicate the beginning of data
        data_l2_begin = tmpdf.index[tmpdf[tmpdf.isin(
            DATA_L2_HDR_KEYS)].notnull().any(axis=1)].tolist()
        # get indices for the rows where the misc headers are present
        # these will indicate the end of data
        data_l2_end = tmpdf.index[tmpdf[tmpdf.isin(
            DATA_SUM1_KEYS)].notnull().any(axis=1)].tolist()
        # get indices for any other keys that are part of data
        data_other = tmpdf.index[tmpdf[tmpdf.isin(
            DATA_SUM2_KEYS)].notnull().any(axis=1)].tolist()

        # generate indices of non-data rows
        metadata_idx = list(range(0, data_l1[0]))
        n = len(DATA_L1_HDR_KEYS)

        # TODO: malformed files may have any of the keys missing resulting in
        # empty lists of indices

        for i in range(0, n):
            metadata_idx += list(range(data_l1[i] + 1, data_l2_begin[i]))
            if i < n - 1:
                metadata_idx += list(range(data_l2_end[i] + 1, data_l1[i + 1]))

        metadata_idx += list(range(data_l2_end[n - 1] + 1, data_other[0]))
        metadata_idx += list(range(data_other[-1] + 1, tmpdf.shape[0]))

        # copy metadata rows to its own dataframe
        tmpdf_md = tmpdf.loc[metadata_idx, :]
        # clean-up
        tmpdf_md.dropna(axis=1, how='all', inplace=True)
        tmpdf_md.dropna(axis=0, how='all', inplace=True)

        # purge metadata rows (copied above) from the data df
        tmpdf.drop(metadata_idx, inplace=True)
        # clean-up
        tmpdf.dropna(axis=1, how='all', inplace=True)
        tmpdf.dropna(axis=0, how='all', inplace=True)

        # collect l1 header names
        # needed because we don't know the order in which the l1 headers occur in data
        df_l1 = tmpdf.loc[data_l1]
        df_l1 = df_l1.loc[:, df_l1.notnull().any()]
        l1_hdrs = df_l1.T.unstack().tolist()
        l1_hdrs = [s for s in l1_hdrs if str(s) != 'nan']

        # drop all l1 headers
        # we will be using a single-level index for the final df
        # l1 headers will be used to populate a categorical var instead
        tmpdf.drop(data_l1, inplace=True)

        # create a new ddtaframe for each school type
        df_list = []
        for i in range(0, n):

            row_idx = list(range(data_l2_begin[i] + 1, data_l2_end[i] + 1))
            col_idx = data_l2_begin[i]
            school_type = l1_hdrs[i]

            df_list.append(extract_df(tmpdf, row_idx, col_idx, school_type))

            # if this the last of the school types we need to append
            # the aggregated lea rows. we do this as a separate df containing
            # data_other rows.
            if (i == n - 1):
                row_idx = data_other
                df_list.append(extract_df(tmpdf, row_idx, col_idx, np.nan))

        # we have a df with all data for all school types including aggregated
        # rows at this point
        df_full = pd.concat(df_list, axis=0, ignore_index=True)

        # recode column names
        df_full.rename(columns=DATA_L2_HDR_DICT, inplace=True)
        # recode school_type
        df_full['school_type'] = df_full['school_type'].map(DATA_L1_HDR_DICT)
        # recode other fields
        cond = df_full['index'].isin(data_l2_end + data_other)
        df_full.loc[cond, 'school_name'] = df_full[cond]['school_code'].map(
            DATA_SUM_DICT)
        df_full.loc[cond, 'school_code'] = INVALID_SCHOOL_CODE
        cond = df_full['index'].isin(data_other)
        df_full.loc[cond, 'school_type'] = ALL_SCHOOL_TYPE

        df_full.drop(['index'], axis=1, inplace=True)
        # re-arrange cols to original order
        df_full = df_full[list(DATA_L2_HDR_DICT.values()) + ['school_type']]

        #
        # METADATA
        #

        # add appropriate prefix and suffix to metadata keys
        md_keys = ['   ' + s + ':' for s in METADATA_KEYS]
        # get indices for rows where the metadata keywords are present
        md_idx = tmpdf_md.index[tmpdf_md[tmpdf_md.isin(md_keys)].notnull().any(
            axis=1)].tolist()

        # extract non-null cols only for those rows containing metadata keys
        tmpdf_md = tmpdf_md.loc[md_idx, :]
        tmpdf_md.dropna(axis=1, how='all', inplace=True)
        tmpdf_md.dropna(axis=0, how='all', inplace=True)
        tmpdf_md.columns = list(range(0, tmpdf_md.shape[1]))

        # extract metadata keys
        md_keys = list(tmpdf_md.loc[:, METADATA_KEY_COLS].unstack().values)
        md_keys = list(map(str.strip, md_keys))
        md_keys = list(map(str.lower, md_keys))
        md_keys = [s.replace(' ', '_') for s in md_keys]
        md_keys = [s[:-1] for s in md_keys]

        # extract metadata values
        md_vals = list(tmpdf_md.loc[:, METADATA_VAL_COLS].unstack().values)
        md_vals = [s.lower() if isinstance(s, str) else s for s in md_vals]

        md_dict = dict(zip(md_keys, md_vals))

        # store only at the end when we have successfully completed all steps
        # for both data and metadata
        self.d_df = df_full
        self.md_dict = md_dict

    except Exception as e:
        raise e
def pandas():
    ### PANDAS
    # Import desired inventory file and save as a variable
    file = 'Grocery.xlsx'
    xl = pd.ExcelFile(file)

    # Display all sheets that exist in spreadsheet file
    print(xl.sheet_names)

    # Generate DataFrame from imported spreadsheet and print
    df1 = xl.parse('Page 1')
    print(df1)
    shoplist = ["" for x in range(1)]

    item = "Start"
    while item != "Done":
        item = raw_input("Add item or type Done: ")
        #except ValueError:
        #print("Sorry, invalid input. Try adding another")
        if item != "Done":
            if df1['Item'].str.match(item).any():
                print(item)
                for j in range(len(shoplist)):
                    if shoplist[j] == "":
                        shoplist[j] = item
                        shoplist.append("")
                        print(shoplist)
                        break
            else:
                print("Item not found. Please add a new Item.")
        else:
            break
    # Use to test location of specific item-->> print(df1.loc[df1['Item'] == 'Nuts'])

    # Find row in which desired item exists
    for p in range(len(shoplist) - 1):
        #  rowselect = (df1.index[df1['Item'] == shoplist[p]])
        rowselect = (df1.index[df1['Item'].str.match(shoplist[p])])
        row4search = rowselect + 1  #Variable used to search for numerical information in sheet
        #Display int64 values
        #print(rowselect)
        #print(row4search)
        #Display parsed row of item info
        #print(df1.loc[rowselect])

        wb = load_workbook('Grocery.xlsx')  #Loads spreadsheet
        #sheet_1 = wb.get_sheet_by_name('Page 1') #Extracts desired sheet
        sheet_1 = wb['Page 1']  #Extracts desired sheet - this is better than ^

        code = np.zeros(sheet_1.max_row
                        )  #Create blank array based on size of inventory list
        cost = np.zeros(sheet_1.max_row)
        aisle = np.zeros(sheet_1.max_row)
        bay = np.zeros(sheet_1.max_row)

        for i in range(1, sheet_1.max_row):
            code[i] = sheet_1.cell(
                row=i + 1,
                column=2).value  #Fill in data to blank arrays from spreadsheet
            cost[i] = sheet_1.cell(row=i + 1, column=3).value
            aisle[i] = sheet_1.cell(row=i + 1, column=4).value
            bay[i] = sheet_1.cell(row=i + 1, column=5).value

    # Full info display
        print(shoplist[p])
        print('Barcode', code[row4search])  #Display desired item barcode
        print('Price', cost[row4search])  #Display desired item cost
        print('Aisle', aisle[row4search])  #Display desired item isle
        print('Bay', bay[row4search])  #Display desired item bay
Beispiel #26
0
                                            update_start_date)

# compute epidemic metrics - Compute metrics assoacited with time varying data, writting the data base for each date
computing_metrics = True
delta = timedelta(days=1)
current_date = update_start_date
g = []
graph_exists = False

while computing_metrics:

    print("checking epidem_data_" + str(current_date) + ".xlsx")

    try:
        # check if metrics were already computed for current_date
        DB_raw = pd.ExcelFile("data/data_repo/epidem_data_" +
                              str(current_date) + ".xlsx")

    except FileNotFoundError:

        if (current_date < update_end_date):

            print("new epidemic data avalilable, but not processed...UPDATE!")
            print("computing epidemic data for " + str(current_date))

            # initialize data dict
            epidem_data = dict([])
            epidem_data["Name"] = [name for name in constant_db["Name"]]
            epidem_data["Code"] = [code for code in constant_db["Code"]]
            epidem_data["latitude"] = [longi for longi in constant_db["long"]]
            epidem_data["longitude"] = [lat for lat in constant_db["lat"]]
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 11 01:28:11 2017

@author: Shabaka
"""

# Import pandas
import pandas as pd

# Assign spreadsheet filename: file
file = 'battledeath.xlsx'

# Load spreadsheet: xl
xl = pd.ExcelFile(file)

# Print sheet names
print(xl.sheet_names)
"""
Import Excel Sheets Specifically
"""

# Load a sheet into a DataFrame by name: df1
df1 = xl.parse('2004')

# Print the head of the DataFrame df1
print(df1.head())

# Load a sheet into a DataFrame by index: df2
df2 = xl.parse(0)
            sheet1.write(i, 0, f)
            sheet1.write(i, 1, count / 60.0)
            if (count / 60.0 > 0):
                sheet1.write(i, 1, 1)
            else:
                sheet1.write(i, 1, 0)
        image_list = []
        wb.save('Pixel_Density_' + k[0:1] + "_" + k[2:3] + '.xls')
x_list = []
for g in glob.glob("*.xls"):
    x_list.append(g)
x_list.sort()
print x_list
kkk = 0
for z in x_list:
    xls_file = pd.ExcelFile(z)
    df = xls_file.parse('Sheet 1')
    df = df.sort_values(by='IMAGE NAME')
    df = df.drop('IMAGE NAME', 1)
    df = df.T
    df = df.reset_index(drop=True)
    df.to_csv('file' + str(kkk) + '.csv')
    df = read_csv('file' + str(kkk) + '.csv')
    first_column = df.columns[0]
    # Delete first
    df = df.drop([first_column], axis=1)
    df.to_csv('file' + str(kkk) + '.csv', index=False)
    df = rea_csv('file' + str(kkk) + '.csv')
    first_column = df.columns[64]
    # Delete first
    df = df.drop([first_column], axis=1)
Beispiel #29
0
# Concatenate excel (.xlsx) files
import pandas as pd
import os

# filenames
excel_names = [f for f in os.listdir() if f[-5:] in ('.xlsx', '.xls')]

# read them in
excels = [pd.ExcelFile(name) for name in excel_names]

# turn them into dataframes
frames = [
    x.parse(x.sheet_names[0], header=None, index_col=None) for x in excels
]

# delete the first row for all frames except the first
# i.e. remove the header row -- assumes it's the first
frames[1:] = [df[1:] for df in frames[1:]]

# concatenate them..
combined = pd.concat(frames)

# write it out
combined.to_excel("compiled.xlsx", header=False, index=False)
Beispiel #30
0
import pandas
import random

path1 = r"FFL2 Data.xlsx"
workbook = pandas.ExcelFile(path1)
encounters = workbook.parse("Encounters")
INCREASE_RATE = 5


def rollGroupSize(text):
    roll_range = text.split("-")
    if len(roll_range) > 1:
        return random.randint(int(roll_range[0]), int(roll_range[1]))
    else:
        return 1


gen = "y"
while gen != "n":
    zone = input("Generate encounter for which zone: ")
    roll = random.randint(1, 256)
    group1 = ""
    group2 = ""
    group3 = ""
    num1 = 0
    num2 = 0
    num3 = 0

    zone_table = encounters.loc[encounters["ZONE"] == zone]
    for row in range(encounters.shape[0]):
        if zone_table.iloc[row, 2] >= roll: