コード例 #1
0
def diag_aggregates():

    years = ['2006', '2007', '2008', '2009']

    df_final = None
    for yr in years:
        xls = ExcelFile(fname_all)
        df = xls.parse(yr, hindex_col= True)

        cols = [u"Mesure",
                u"Dépense \n(millions d'€)",
                u"Bénéficiaires \n(milliers)",
                u"Dépenses \nréelles \n(millions d'€)",
                u"Bénéficiaires \nréels \n(milliers)",
                u"Diff. relative \nDépenses",
                u"Diff. relative \nBénéficiaires"]
        selected_cols = [u"Mesure", u"Diff. relative \nDépenses", u"Diff. relative \nBénéficiaires"]
        df = df[selected_cols]
        df['year'] = yr
        df['num'] = range(len(df.index))
        df = df.set_index(['num', u'Mesure', 'year'])
        if df_final is None:
            df_final = df
        else:

            df_final = df_final.append(df, ignore_index=False)

#    DataFrame.groupby()
    df_final = df_final.sortlevel(0)
    print str(fname_all)[:-5]+'_diag.xlsx'
    writer = ExcelWriter(str(fname_all)[:-5]+'_diag.xlsx')
    df_final.to_excel(writer, sheet_name="diagnostics", float_format="%.2f")
    writer.save()
コード例 #2
0
class EXCEL:
    def __init__(self, xls_filepath):
        self.filepath = xls_filepath
        self.xls_reader = ExcelFile(xls_filepath)
        self.sheet_names = self.xls_reader.sheet_names
        if len(self.sheet_names) == 1:
            self.select_sheet(self.sheet_names[0])
        self.time = datetime.datetime.now()

    def add(self):
        pass

    @property
    def data(self):
        return self._data

    def select_sheet(self, sheet_name):
        self._data = self.xls_reader.parse(
            sheet_name)  #self._data = pd.read_excel(xls_filepath)

    def merge_sheet(self):
        sheets = []
        for sheet_name in self.sheet_names:
            sheet = self.xls_reader.parse(sheet_name)
            sheets.append(sheet)
        self._data = pd.concat(sheets)

    def save(self, xls_filepath, sheet_name='Sheet5'):
        self.xls_reader.close()
        self.xls_writer = ExcelWriter(xls_filepath)
        self._data.to_excel(self.xls_writer, sheet_name)
        self.xls_writer.save()
コード例 #3
0
ファイル: nordpool.py プロジェクト: espenfb/SmartTS_Lab
def read_excel(fname, header=None):
    """Read excel into dict.
    Args:
        fname: name of excel file
        header: The finland files does not have a header
    Output:
        dictionary containing the data
    """
    xls = ExcelFile(fname)
    if header:
        parse_cols = [1]
    else:
        parse_cols = None

    df = xls.parse(xls.sheet_names[0], skiprows=1, parse_cols=parse_cols)

    # Fix keys
    temp = df.to_dict()
    for key in temp:
        new_key = key.replace(" - ", "_")
        temp[new_key] = temp.pop(key)
    # Stupid hack for Finland
    if header:
        temp[header] = temp.pop(temp.keys()[0])

    return temp
コード例 #4
0
ファイル: data_tools.py プロジェクト: PTB-M4D/GridSens
def network_UKGDS(filename,header=28):
	"""
	Load Excel file with UKGDS data format and build dict array of bus coordinates
	and graph structure suitable for plotting with the networkx module.
	"""
	from numpy import array,where
	from pandas import ExcelFile
	from networkx import Graph

	data = ExcelFile(filename)
	bus = data.parse("Buses",header=header)
	branch = data.parse("Branches",header=header)
	pos = {}
	for node in range(len(bus["BNU"])):
		pos.update({node:array([bus["BXC"][node],bus["BYC"][node]])})
	net = []
	for k in range(len(branch["CFB"])):
		von = where(bus["BNU"]==branch["CFB"][k])[0][0]
		zu  = where(bus["BNU"]==branch["CTB"][k])[0][0]
		net.append([von,zu])
	nodes = set([n1 for n1,n2 in net] + [n2 for n1,n2 in net])
	G = Graph()
	for node in nodes:
		G.add_node(node)
	for edge in net:
		G.add_edge(edge[0],edge[1])
	return G,pos
コード例 #5
0
ファイル: xls_converter.py プロジェクト: suibh/mic-tac-toe
    def convert(self, file_bytes):
        """Accepts a bytes array and returns a json string """

        excel_file = None

        try:
            excel_file = ExcelFile(BytesIO(file_bytes))
        except Exception as e:
            log.error("Error reading in excel bytes, {}".format(e))

        if excel_file is None:
            return self._default_value()

        if self.sheet_reader.sheet_name not in excel_file.sheet_names:
            return self._default_value()

        try:
            df = excel_file.parse(self.sheet_reader.sheet_name)
            success, output = self.sheet_reader.read(df)

            if not success:
                return self._default_value()

            return True, json.dumps(output, sort_keys=True)
        except Exception as e:
            log.error("Error parsing file: {}".format(e))

        return self._default_value()
コード例 #6
0
def uploadfile_store(request):

    if request.method == 'POST':

        try:

            filename = request.FILES['fileupload'].name
            filedata = request.FILES['fileupload'].read()

            file_extension = os.path.splitext(filename)[-1]



            if file_extension == ".xls" or file_extension == ".xlsx":

                excel_data = ExcelFile(StringIO.StringIO(filedata))
                df = excel_data.parse(excel_data.sheet_names[0],header=None, index_col=None, na_values="")
                df=df.fillna("")
                ht = df.to_html(header=True, index=True, float_format=lambda x: '%10.2f' % x, classes="table table-bordered table-striped draggable").encode('utf-8')




                return HttpResponse("""
                <html><head><script type="text/javascript">
                window.top.ClearUploadEisup();            
                </script>
                <style>
                table {
                        border-collapse: collapse;
                        margin-left: 30px;
                }
    
                table, th, td {
                        border: 1px solid black;
                        font-family: Verdana, Arial, Helvetica, sans-serif; 
                        font-size: 8pt;  
                }
                </style>
                </head>%s</html>
                """ % ht)


            else:



                return HttpResponse("""
                <html><head><script type="text/javascript">                
                    window.top.ClearUploadEisup();            
                    alert("Формат файла не поддерживается!");
                </script></head></html>
                """)


        except:
            return HttpResponse("""
            <html><head><script type="text/javascript">                
            </script></head></html>
            """)
コード例 #7
0
ファイル: frametime.py プロジェクト: fibn144/nipet
    def from_excel(self, excel_file, units):
        """Pulls timing info from excel file and stores in an array.
        Parameters
        ----------
            excel_file:
                the name of the file to import from. 
                e.g. file.xls
            units:
                the units the imported data is in
        """
        try:
            df = ExcelFile(excel_file).parse('Sheet1') #dataframe
            rec = df.to_records()

            #can be converted to numpy array
            #by using rec.astype all the same type
            #then calling .view(that type) with the result 
            #supposedly this is faster than the below method

            dat_arr = np.array(rec.tolist()) #pirate

            #get rid of the 'index' column from pandas
            self.data = dat_arr[0:dat_arr.shape[0], 1:self.col_num + 1]
            self.units = units
        except IOError:
            print "Oops."
        try:
            self._validate_frames()
        except FrameError:
            raise DataError('Bad data', self.data, excel_file)
コード例 #8
0
def read_and_save(file_name, log_file):
    print(f"Reading {file_name }")
    xls = ExcelFile(file_name)
    data = xls.parse(xls.sheet_names[0])
    for row_index, row in data.iterrows():
        try:
            if 'OFFICE' in row.keys():
                branch = row['OFFICE']
            else:
                branch = row['BRANCH']
            if 'BANK NAME' in row.keys():
                bank_name = row['BANK NAME']
            else:
                bank_name = row['BANK']

            # checking if already there or not
            obj = BankDetail.objects.filter(ifsc_code=row['IFSC'])
            if obj:
                obj.update(branch_name=branch,
                           bank_name=bank_name,
                           branch_address=row['ADDRESS'])
            else:
                BankDetail(ifsc_code=row['IFSC'],
                           branch_name=branch,
                           bank_name=bank_name,
                           branch_address=row['ADDRESS']).save()

        except KeyError as e:
            log_file.write(file_name)
            print(e)
            print(
                f"Error while reading file { file_name.split(os.sep)[-1] }\nSkipping."
            )
            break
コード例 #9
0
ファイル: core.py プロジェクト: zhiwentech/pyBankTransactions
def parse_trans_bonx(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '交易机构': '交易网点',
        '交易类型': '交易方式',
        '借贷标识': '借贷标志',
        '对方行名': '对方开户行',
        '对方名称': '对方户名',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=4)
        if len(tmp_acc_strs) == 0:
            continue
        _name = tmp_acc_strs.iloc[1, 0].split(':')[1]
        _account = tmp_acc_strs.iloc[2, 0].split(':')[1]
        _card = tmp_acc_strs.iloc[3, 0].split(':')[1]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=6,
                                           dtype=str)
        tmp_trans_sheet.columns = tmp_trans_sheet.columns.str.strip()
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['卡号'] = _card
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num
コード例 #10
0
 def __init__(self, xls_filepath):
     self.filepath = xls_filepath
     self.xls_reader = ExcelFile(xls_filepath)
     self.sheet_names = self.xls_reader.sheet_names
     if len(self.sheet_names) == 1:
         self.select_sheet(self.sheet_names[0])
     self.time = datetime.datetime.now()
コード例 #11
0
def build_totals():
    h5_name = "../amounts.h5"
    store = HDFStore(h5_name)
    files = [
        'logement_tous_regime', 'openfisca_pfam_tous_regimes',
        'minima_sociaux_tous_regimes', 'IRPP_PPE', 'cotisations_RegimeGeneral'
    ]
    first = True
    for xlsfile in files:
        xls = ExcelFile(xlsfile + '.xlsx')
        df_a = xls.parse('amounts', na_values=['NA'])
        try:
            df_b = xls.parse('benef', na_values=['NA'])
        except:
            df_b = DataFrame()

        if first:
            amounts_df = df_a
            benef_df = df_b
            first = False
        else:
            amounts_df = concat([amounts_df, df_a])
            benef_df = concat([benef_df, df_b])

    amounts_df, benef_df = amounts_df.set_index("var"), benef_df.set_index(
        "var")
    print amounts_df.to_string()
    print benef_df.to_string()
    store['amounts'] = amounts_df
    store['benef'] = benef_df
    store.close
コード例 #12
0
ファイル: core.py プロジェクト: zhiwentech/pyBankTransactions
def parse_trans_pab(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '借方发生额': '交易金额',
        '交易对方户名': '对方户名',
        '交易对方账号': '对方账号',
        '交易对方行名称': '对方开户行',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=5)
        tmp_acc_strs.dropna(how='all', axis=1, inplace=True)
        _name = tmp_acc_strs.iloc[1, 3]
        _account = tmp_acc_strs.iloc[1, 1]
        _card_num = tmp_acc_strs.iloc[2, 1]
        _currency = tmp_acc_strs.iloc[4, 3]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=6,
                                           dtype=str,
                                           skipfooter=2)
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['卡号'] = _card_num
        tmp_trans_sheet['币种'] = _currency
        tmp_trans_sheet['交易金额'] = tmp_trans_sheet['交易金额'].str.replace(',', '')
        tmp_trans_sheet['贷方发生额'] = tmp_trans_sheet['贷方发生额'].str.replace(
            ',', '')
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num
コード例 #13
0
    def build_lookup_table(data_model: FileDataModel, value_column,
                           label_column, workflow_spec_id, field_id):
        """ In some cases the lookup table can be very large.  This method will add all values to the database
         in a way that can be searched and returned via an api call - rather than sending the full set of
          options along with the form.  It will only open the file and process the options if something has
          changed.  """
        xls = ExcelFile(data_model.data)
        df = xls.parse(
            xls.sheet_names[0])  # Currently we only look at the fist sheet.
        df = pd.DataFrame(df).replace({np.nan: None})
        if value_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (data_model.file_model.name, value_column))
        if label_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (data_model.file_model.name, label_column))

        lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id,
                                       field_id=field_id,
                                       file_data_model_id=data_model.id,
                                       is_ldap=False)

        db.session.add(lookup_model)
        for index, row in df.iterrows():
            lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                          value=row[value_column],
                                          label=row[label_column],
                                          data=row.to_dict(OrderedDict))
            db.session.add(lookup_data)
        db.session.commit()
        return lookup_model
コード例 #14
0
ファイル: corpus.py プロジェクト: estnltk/pfe
def excel_to_corpus(excel_path, corpus_path):
    '''NB! Make sure to use .xls file extension for Excel files.'''
    corpus = PyCorpus(corpus_path)
    excel  = ExcelFile(excel_path)
    # as we do not know the number of sheets, we parse all of them
    # until we obtain a error
    idx = 0
    while True:
        try:
            df = excel.parse(str(idx))
            # recreate some information that was modified when exporting to xls
            new_df = dict()
            for col in df.columns:
                data = []
                for v in df[col]:
                    if type(v) == float and math.isnan(v):
                        data.append(None)
                    elif v == 0:
                        data.append(False)
                    elif v == 1:
                        data.append(True)
                    else:
                        data.append(v)
                new_df[col] = Series(data)
            corpus[str(idx)] = DataFrame(new_df)
        except xlrd.biffh.XLRDError:
            break
        idx += 1
    corpus.close()
コード例 #15
0
def build_totals():
    h5_name = "../amounts.h5"
    store = HDFStore(h5_name)

    files = ['logement_tous_regime', 'pfam_tous_regimes',
             'minima_sociaux_tous_regimes', 'IRPP_PPE', 'cotisations_TousRegimes' ]

    first = True
    for xlsfile in files:
        xls = ExcelFile(xlsfile + '.xlsx')
        print xls.path_or_buf
        df_a = xls.parse('amounts', na_values=['NA'])
        try:
            df_b   = xls.parse('benef', na_values=['NA'])
        except:
            df_b = DataFrame()

        if first:
            amounts_df = df_a
            benef_df =  df_b
            first = False
        else:
            amounts_df = concat([amounts_df, df_a])
            benef_df =  concat([benef_df, df_b])

    amounts_df, benef_df = amounts_df.set_index("var"), benef_df.set_index("var")
    print amounts_df.to_string()
    print benef_df.to_string()
    store['amounts'] = amounts_df
    store['benef']   = benef_df
    store.close
コード例 #16
0
    def action_import_pricelists(self):
        for item in self:
            active_id = item._context['active_id']
            pricelists_obj = item.env[item._context['active_model']]
            product_obj = item.env['product.product']
            data = StringIO(item.binary_data.decode('base64'))
            xls = ExcelFile(data)
            data = xls.parse(xls.sheet_names[0])
            pricelists_dict = data.to_dict()

            for running in range(0, len(pricelists_dict['PID'])):
                pid = str(pricelists_dict['PID'][running]).zfill(7)
                price_inc_vat = pricelists_dict['Price (Inc. Vat)'][running]
                product_id = product_obj.search([('default_code', '=', pid)
                                                 ]).id

                if not product_id:
                    raise except_orm(_('PID does not exist: %r') % (pid, ))
                if not pid or not price_inc_vat:
                    raise except_orm(_('Some PID or Price have empty text.'))

                pricelists_obj.pricelists_line_ids.create({
                    'pricelists_id':
                    active_id,
                    'product_id':
                    product_id,
                    'price_inc_vat':
                    price_inc_vat,
                })

        return {'type': 'ir.actions.act_window_close'}
コード例 #17
0
ファイル: core.py プロジェクト: zhiwentech/pyBankTransactions
def parse_trans_psbc(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '交易渠道': '交易方式',
        '交易机构名称': '交易网点',
        '对方账号/卡号/汇票号': '对方账号',
        '对方开户机构': '对方开户行',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=4)
        if len(tmp_acc_strs) == 0:
            continue
        _tmp_str = tmp_acc_strs.iloc[1, 0].split(':')
        _name = _tmp_str[2]
        _account = _tmp_str[1].split()[0]
        _currency = tmp_acc_strs.iloc[3, 0].split(':')[1].split()[0]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=5,
                                           dtype=str,
                                           skipfooter=3)
        tmp_trans_sheet.columns = tmp_trans_sheet.columns.str.strip()
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['币种'] = _currency
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num
コード例 #18
0
 def __init__(self, input_file):
     """
     Create a class to import and parse the excel spreadsheet that is used
     as an input file for V/UQ-predictivity.
     """
     self.file_name = input_file
     # Import the excel file:
     self.xlfile = ExcelFile(self.file_name)  # to retrieve & work w/ input
コード例 #19
0
def dump_xlsx2dict(xlsx_file):
    xls = ExcelFile(xlsx_file)
    df = xls.parse(xls.sheet_names[0])
    dict = df.to_dict()
    dict2list = [{key: value[i] for key, value in dict.items()}
           for i in range(len(dict['01_PatientName']))]

    return dict2list
コード例 #20
0
def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()
def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()
コード例 #22
0
def xls_to_dict(filepath):
    try:
        xls = ExcelFile(filepath)
    except IOError:
        print '%s File Not found' % filepath
        return {}
    df = xls.parse(xls.sheet_names[0])
    d = df.to_dict(orient='records')
    return d
コード例 #23
0
def get_loyer_inflator(year):

    xls = ExcelFile('../countries/france/data/sources/loyers.xlsx')
    df = xls.parse('data', na_values=['NA'])
    irl_2006 = df[(df['year'] == 2006) & (df['quarter'] == 1)]['irl']
    #    print irl_2006
    irl = df[(df['year'] == year) & (df['quarter'] == 1)]['irl']
    #    print irl
    return float(irl.values / irl_2006.values)
コード例 #24
0
def start(file_name, download_name="example.xlsx"):

    with open(file_name, "rb") as f:
        text = f.read()

    excel_data = ExcelFile(io.BytesIO(text), engine='openpyxl')
    test_frame = excel_data.parse(excel_data.sheet_names[0])

    return create_download_link_excel(test_frame, download_name)
コード例 #25
0
def get_loyer_inflator(year):

    xls = ExcelFile('../countries/france/data/sources/loyers.xlsx')
    df = xls.parse('data', na_values=['NA'])
    irl_2006 = df[ (df['year'] == 2006) & (df['quarter'] == 1)]['irl']
#    print irl_2006
    irl = df[ (df['year'] == year) & (df['quarter'] == 1)]['irl']
#    print irl
    return float(irl.values/irl_2006.values)
コード例 #26
0
ファイル: cherry.py プロジェクト: mcvmcv/cherry
	def openDialog(self):
		'''Opens a saved .xls file.'''
		title									= 'Open a saved project file...'
		fileName,f								= QFileDialog.getOpenFileName(self,title,self.path)
		excelFile								= ExcelFile(fileName)
		self.__clearAll()
		[self.markers.append(Table(sheet,excelFile.parse(sheet))) for sheet in excelFile.sheet_names]
		[self.tabs.addTab(marker,marker.name) for marker in self.markers]
		self.__updateView()
コード例 #27
0
    def build_lookup_table(file_id,
                           file_name,
                           file_data,
                           value_column,
                           label_column,
                           workflow_spec_id=None,
                           task_spec_id=None,
                           field_id=None):
        """ In some cases the lookup table can be very large.  This method will add all values to the database
         in a way that can be searched and returned via an api call - rather than sending the full set of
          options along with the form.  It will only open the file and process the options if something has
          changed.  """
        try:
            xlsx = ExcelFile(file_data, engine='openpyxl')
        # Pandas--or at least openpyxl, cannot read old xls files.
        # The error comes back as zipfile.BadZipFile because xlsx files are zipped xml files
        except BadZipFile:
            raise ApiError(
                code='excel_error',
                message=
                f"Error opening excel file {file_name}. You may have an older .xls spreadsheet. (file_model_id: {file_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})"
            )
        df = xlsx.parse(
            xlsx.sheet_names[0])  # Currently we only look at the fist sheet.
        df = df.convert_dtypes()
        df = df.loc[:, ~df.columns.str.contains(
            '^Unnamed')]  # Drop unnamed columns.
        df = pd.DataFrame(df).dropna(how='all')  # Drop null rows
        df = pd.DataFrame(df).replace({NA: ''})

        if value_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (file_name, value_column))
        if label_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (file_name, label_column))

        lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id,
                                       field_id=field_id,
                                       task_spec_id=task_spec_id,
                                       file_model_id=file_id,
                                       is_ldap=False)

        db.session.add(lookup_model)
        for index, row in df.iterrows():
            lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                          value=row[value_column],
                                          label=row[label_column],
                                          data=row.to_dict(OrderedDict))
            db.session.add(lookup_data)
        db.session.commit()
        return lookup_model
コード例 #28
0
def loadfields():
    '''
    Get the variable names in the chosen excel sheet
    '''

    filename = ents[1][1].get()
    f = path.basename(filename)
    status.set("Status: loading data and column names of %s" %
               f.encode().decode())
    adds = ExcelFile(filename)
    sheet = sheet_combo.get()
    #   if first row is not entered, assume 1 and set the form to 1.
    if frow.get() == "":
        frow.insert(0, 1)
        first_row = 1
    else:
        first_row = int(frow.get())
    print("%s and %s onwards chosen." % (sheet, first_row))
    df = adds.parse(sheet, skiprows=first_row - 1)
    #print(df.columns.values)
    print("There are %s observations on this file." % len(df.index))
    [
        'Business Name:', 'Street Number:', 'Street Name:', 'City/Borough:',
        'Zipcode:', 'Boro Code:'
    ]
    defaults = {
        0: 'trade',
        1: 'legal',
        2: 'originaladdress',
        3: 'streetnumber',
        4: 'streetname',
        5: 'Borough',
        6: 'pzip',
        7: 'boro',
        8: 'state',
        9: '',
        10: ''
    }
    for i in range(len(combos)):
        collist = list(df.columns.values)
        collist.append("")
        combs[i][1]['state'] = 'enabled'
        combs[i][2]['state'] = 'enabled'
        combs[i][1]['values'] = sorted(collist, key=keyfunction)
        combs[i][2]['values'] = sorted(collist, key=keyfunction)
        choose_default(i, 1, collist, defaults[i])
        choose_default(i, 2, collist, defaults[i])
    chk['state'] = 'enabled'
    b4['state'] = 'enabled'
    #    print(combs[0][0], df[combs[0][1].get()].head(10))
    status.set(
        "Status: Choose address fields, optionally edit output file, and press 'Geocode'"
    )
    global DFrame
    DFrame = df
    return df
コード例 #29
0
ファイル: functions.py プロジェクト: gvenki/tools
def drop_duplicates(final, keep):
    '''Drop's the Duplicate rows as some files have same repeated rows'''
    xls_file = ExcelFile(final, index=False)
    df = xls_file.parse('Page 1')
    print("\nDropping duplicates")
    df4 = df.drop_duplicates(subset=['Number', 'Expert_Assigned', 'Opened', 'Definition', 'Value', 'Created'],
                             keep=keep)
    df5 = df4.sort_values(['Number', 'Created', 'Definition', 'Expert_Assigned'], ascending=[True, True, False, True])
    df5.to_excel(final, sheet_name='Page 1', index=False)
    return
コード例 #30
0
ファイル: parser.py プロジェクト: thesgc/cbh_core_ws
def get_custom_field_config(filename, sheetname):
    '''Early example of the import of a custom field config based upon a list of field names'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data.columns = ["name", "required", "description"]
    data["required"] = data["required"].apply(is_true)

    data = data.fillna('')
    mydata = [{key: unicode(value) for key, value in point.items()} for point in data.T.to_dict().values()]
    return mydata
コード例 #31
0
def read_excel(filename):
    '''
    Read Excel File provided by filename.

    :param filename - path to an Excel file:
    :return: pandas dataframe
    '''
    xl = ExcelFile(filename)
    df = xl.parse(xl.sheet_names[0])
    return df
コード例 #32
0
ファイル: utils.py プロジェクト: wgor/ComOpt_Loan
def data_import(file) -> Dict[str, DataFrame]:
    xl = ExcelFile(file)
    output = dict()
    for sheet in xl.sheet_names:
        if "Flags" in sheet or "EMS" in sheet:
            output[sheet] = xl.parse(sheet_name=sheet, index_col="Parameter")
        else:
            try:
                output[sheet] = xl.parse(sheet_name=sheet, index_col="time")
            except ValueError:
                output[sheet] = xl.parse(sheet_name=sheet)
    return output
コード例 #33
0
ファイル: parser.py プロジェクト: tsufz/chembiohub_ws
def get_custom_field_config(filename, sheetname):
    '''Early example of the import of a custom field config based upon a list of field names'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data.columns = ["name", "required", "description"]
    data["required"] = data["required"].apply(is_true)

    data = data.fillna('')
    mydata = [{key: unicode(value)
               for key, value in point.items()}
              for point in data.T.to_dict().values()]
    return mydata
コード例 #34
0
def uploadfile_page2(request):


    if request.method == 'POST':

        if request.POST.has_key("reestrproj") and request.FILES.has_key("file"):

            reestrproj = request.POST["reestrproj"]

            filename = request.FILES['file'].name
            filedata = request.FILES['file'].read()

            file_extension = os.path.splitext(filename)[-1]

            if file_extension != ".xls" and file_extension != ".xlsx":

                return HttpResponse("""
                <html><head><script type="text/javascript">
                    window.top.ClearUploadP2();
                    alert("Формат файла не поддерживается!");
                </script></head></html>
                """)

            else:

                rp = reestr_proj.objects.get(pk=int(reestrproj, 10))

                excel_data = ExcelFile(StringIO.StringIO(filedata))
                df = excel_data.parse(excel_data.sheet_names[0], header=None)
                df=df.fillna("")
                ht = df.to_html(header=False,index=False, float_format=lambda x: '%10.2f' % x, classes="table table-bordered small").encode('utf-8')

                data = rp.data
                data["excel"] = ht
                rp.data = data
                rp.save()

                reestr_proj_comment.objects.create(
                    reestr_proj = rp,
                    user = request.user,
                    comment = u"Загружена таблица показателей",
                    log=True
                )



    return HttpResponse("""
    <html><head><script type="text/javascript">
        window.top.ClearUploadP2();
        window.top.GetTableExcel();
        window.top.GetListComments();
    </script></head></html>
    """)
コード例 #35
0
ファイル: parser.py プロジェクト: thesgc/cbh_core_ws
def get_sheet(filename, sheetname):
    '''Extracts a list of dicts from a worksheet of an Excel file along with the
    column names, data types and maximum widths'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data = data.fillna('')
    orig_cols = tuple(data.columns)
    replace = [get_key_from_field_name(column) for column in data.columns]
    data.columns = replace
    types = copy(data.dtypes)
    for col in replace:
        data[col] = data[col].values.astype(unicode)
    return (data.T.to_dict().values(), orig_cols, types, get_widths(data))
コード例 #36
0
def load_moc_and_engineers_info(excel_file: pd.ExcelFile):
    print("Loading MoC and engineers info")
    meta_info = excel_file.parse(INPUT_SHEET_NAME, header=4, usecols="A:C")
    meta_info.rename(columns={meta_info.columns[1]: "E-mail"}, inplace=True)

    moc_info_df = meta_info[meta_info['Name'].astype(str).str.startswith(
        'MoC')]
    engineer_df = meta_info[meta_info['Name'].astype(str).str.startswith(
        'Engineer')]
    engineer_begin_index = find_engineer_begin_index(
        excel_file.parse(INPUT_SHEET_NAME, usecols="A", header=None))
    print("Found engineer begin index at:", engineer_begin_index)
    return moc_info_df, engineer_df, engineer_begin_index
コード例 #37
0
ファイル: parser.py プロジェクト: tsufz/chembiohub_ws
def get_sheet(filename, sheetname):
    '''Extracts a list of dicts from a worksheet of an Excel file along with the
    column names, data types and maximum widths'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data = data.fillna('')
    orig_cols = tuple(data.columns)
    replace = [get_key_from_field_name(column) for column in data.columns]
    data.columns = replace
    types = copy(data.dtypes)
    for col in replace:
        data[col] = data[col].values.astype(unicode)
    return (data.T.to_dict().values(), orig_cols, types, get_widths(data))
コード例 #38
0
ファイル: functions.py プロジェクト: gvenki/tools
def top_row(final):  # naming the indexes of the file
    '''Rename the top row of the sheet with required column names'''
    xls_file1 = ExcelFile(final, index=True)
    df = xls_file1.parse('Page 1')
    print("\nRenaming")
    # naming the indexes of the file
    df4 = df.rename(
        columns={"Unnamed: 0": 'Number', "Unnamed: 1": 'Priority', "Unnamed: 2": 'Opened', "Unnamed: 3": 'Definition',
                 "Unnamed: 4": 'Value', "Unnamed: 5": 'Expert_Assigned', "Unnamed: 6": 'Created',
                 "Unnamed: 7": 'End_time', "Unnamed: 8": 'Resolved', "Unnamed: 9": 'New_Resolved',
                 "Unnamed: 10": 'Closed', "Unnamed: 11": 'Main_file'})
    df4.to_excel(final, sheet_name='Page 1', index=False)
    return
def build_actualisation_group_amounts_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df_a = xls.parse('amounts', na_values=['NA'])
    df_a = df_a.set_index(['case'], drop= True)
    df_b = xls.parse('benef', na_values=['NA'])
    df_c = xls.parse('corresp', na_values=['NA'])
    store['amounts'] = df_a
    store['benef']   = df_b
    store['corresp'] = df_c
    print df_a.to_string()
    print df_a.columns
    store.close()
コード例 #40
0
ファイル: StudentService.py プロジェクト: VadokDev/MOSS-UTFSM
 def studentsExcelToList(self, fileName, startIndex):
     file = ExcelFile(fileName)
     studentsRawData = file.parse(
         file.sheet_names[0]).to_numpy()[startIndex::]
     for student in studentsRawData:
         yield [
             student[1],
             student[3],
             student[5],
             student[6],
             student[7],
             student[9],
             student[10],
         ]
コード例 #41
0
ファイル: cherry.py プロジェクト: mcvmcv/cherry
	def createSamplesFromExcelKea(self):
		'''Opens a Kea Sample Batch spreadsheet and imports the samples.'''
		title									= 'Locate Kea sample batch spreadsheet...'
		fileName,f								= QFileDialog.getOpenFileName(self,title,self.path)
		excelFile								= ExcelFile(fileName)
		imported								= excelFile.parse('Data')
		imported								= imported[['PlantID','Sample ID','Plate No','Position on Plate(s)']]
		imported.columns						= ['Plant','Sample','Origin plate','Origin well']
		imported								= imported.dropna(how='all',subset=['Origin plate','Origin well'])
		imported['From plate']					= imported['Origin plate']
		imported['From well']					= imported['Origin well']
		imported['Plate']						= imported['Origin plate']
		imported['Well']						= imported['Origin well']
		self.markers[0].table					= self.markers[0].table.append(imported,ignore_index=True)
		self.__updateView()
def build_actualisation_group_vars_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('data', na_values=['NA'])
    store['vars'] = df
    print df.to_string()
    print store
    from numpy import unique
    coeff_list = sorted(unique(df['coeff'].dropna()))
    print coeff_list
    groups = {}
    for coeff in coeff_list:
        groups[coeff] = list(df[ df['coeff']==coeff ]['var'])
    print groups
    store.close()
コード例 #43
0
ファイル: controller.py プロジェクト: apetrin/OC16
 def load_auditory(self, file):
     """
     Повторяющиеся загружены не будут
     :param file:
     :return:
     """
     excel_file = ExcelFile(file)
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "settings" in unresolved_dict.keys():
             tmp = Auditory(unresolved_dict, outer_name=name)
             if tmp.inner_name in self.auds.keys():
                 del tmp
             else:
                 self.auds[tmp.inner_name] = tmp
コード例 #44
0
ファイル: views.py プロジェクト: avkozlov/statov
def list(request):
    # Handle file upload
    if request.method == 'POST':
        form = DocumentForm(request.POST, request.FILES)
        if form.is_valid():
            newdoc = Document(docfile = request.FILES['docfile'])
            newdoc.save()



            # Redirect to the document list after POST
            return HttpResponseRedirect(reverse('myproject.myapp.views.list'))

    else:
        form = DocumentForm() # A empty, unbound form


    # Load documents for the list page
    documents = Document.objects.all()[4:]

    a = Document.objects.last()
    url = 'myproject' + a.docfile.url

    if os.path.isfile(url):
        vic = 'TRUE'

        v = ExcelFile(url).parse("Sheet1", parse_cols=[0, 18, 26, 25, 23])
        v.columns = ['Request','Product', 'Paid', 'PaidDate', 'Type']
        # it is right!.to_datetime,
        v.PaidDate = pd.to_datetime(v.PaidDate, format='%d.%m.%Y' )
        df = pd.pivot_table(v, values='Paid', rows='PaidDate', cols=['Type', 'Product'], aggfunc=[np.sum, np.count_nonzero])
        df = df.resample('M', how='sum')
        df = df.fillna(value=0)

        # Render list page with the documents and the form
        return render_to_response(
            'myapp/list.html',
            {'documents': documents, 'form': form, 'df': df.to_html(classes="table-condensed"),'url':url, 'vic': vic},
            context_instance=RequestContext(request)
        )
    else:
        vic = 'False'
        return render_to_response(
            'myapp/list.html',
            {'documents': documents, 'form': form, 'url': url, 'vic': vic},
            context_instance=RequestContext(request)
        )
コード例 #45
0
ファイル: age_structure.py プロジェクト: Iliato/openfisca-qt
def build_from_insee( directory = None, verbose=False):

    if directory is None:
        directory = os.path.dirname(__file__)

    fname = os.path.join(directory, H5_FILENAME)
    store = HDFStore(fname)
    xls = ExcelFile(os.path.join(model.DATA_SOURCES_DIR, "sd2010_t6_fm.xls"))

    df_age_final = None

    for year in range(2006,2010):
        sheet_name = str(year)

        df = xls.parse(sheet_name, header=0, index_col=0, skiprows=8, parse_cols=[1,2], na_values=['NA'])

        df.index.name = u"âge"
        df.rename(columns = {"Unnamed: 1" : year}, inplace = True)

        # Dealing with te 90 et plus and 105 et plus
        df = df.reset_index()
        df = df.dropna(axis=0)
        df.set_value(106,u"âge", 105)
        df = df.set_index(u"âge")
        df.drop(df.index[90], axis=0, inplace=True)
        df.index.name = u"âge"
        df = df.reset_index()
        if verbose:
            print "year : " + str(year)
            print df.to_string()


        if df_age_final is None:
            df_age_final = df
        else:
            df_age_final = df_age_final.merge(df)

    if verbose:
        print df_age_final.to_string()
        print df_age_final.dtypes

    from numpy import dtype
    df_age_final[u"âge"] = df_age_final[u"âge"].astype(dtype("int64"))
    store.put("insee", df_age_final)
コード例 #46
0
ファイル: controller.py プロジェクト: apetrin/OC16
 def __init__(self, file, from_pickle=False):
     if from_pickle:
         data = pickle.load(file)
         Checker.clean_global_init(data["checker_meta"])
         Seat.counters = data["seats_meta"]
         self.__dict__.update(data["controller"].__dict__)
         return
     self.email_handle = list()
     self.mode = {"people": "None"}
     self.last_change = None
     self.people = pd.DataFrame()
     self.auds = dict()
     self.inds = list()
     self.teams = list()
     self.seed = 1
     found_main_settings = False
     excel_file = ExcelFile(file)
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "main_settings" in unresolved_dict.keys():
             if found_main_settings:
                 raise ControllerException("Две страницы с общими настройками!")
             found_main_settings = True
             Checker.raw_global_init(unresolved_dict)
             self.checker = Checker()
     if not found_main_settings:
         raise TypeError("Настройки не найдены, на странице с настройками нужен ключ main_settings")
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "main_settings" not in unresolved_dict.keys():
             tmp = Auditory(unresolved_dict, outer_name=name)
             if tmp.inner_name in self.auds.keys():
                 del tmp
                 raise TypeError("Есть одинаковые аудитории")
             else:
                 self.auds[tmp.inner_name] = tmp
     self._message_upd()
from pandas import DataFrame, ExcelFile
import pandas as pd
import json


# Path to excel file
# Your path will be different, please modify the path below.
location = r'c:/users/meinzerc/Desktop/table.xlsx'

# Create ExcelFile object
xls = ExcelFile(location)

# Parse the excel file
table = xls.parse('Sheet1')
df.head()

location = r'c:/users/meinzerc/Desktop/base.xlsx'

# Create ExcelFile object
xls = ExcelFile(location)

# Parse the excel file
base = xls.parse('Sheet2')
base.head()

base.columns = ['File', 'b', 'c', 'd', 'e','f','g','h']
basecut= base[['File','h']]
h=pd.DataFrame(basecut.h)
final=basecut.File.str.split('\\xa0+\s*\\xa0*')
abc=pd.DataFrame(final.tolist(), columns = ['a','b','c','e','f'],index=final.index)
work=pd.merge(abc,h,how='left',left_index=True,right_index=True)
コード例 #48
0
# Python version 2.7.5
from pandas import ExcelFile
from markovchain import MarkovChain
from orderstatemapper import OrderStateMapper
from equalordermarkovmatrixcomparator import EqualOrderMarkovMatrixComparator
from scipy.stats import chisquare
from array import array

xlsx = ExcelFile('dane.xls')
data = xlsx.parse('strona', parse_cols=[1, 7], index_col=None, na_values=['NA'])

#order = 2
#map(lambda x: MarkovChain(x, order).stdout() , data.groupby('grupa').kto.tolist())

model_2 = MarkovChain( data['kto'],2).markov_matrix
#print(model_2.markov_matrix.keys()[0][0][0:])

model_1 = MarkovChain( data['kto'],1).markov_matrix

model_3 = MarkovChain( data['kto'],3).markov_matrix


mapper = OrderStateMapper(model_1, 1, model_2, 2)


model_1_adjusted_to_2=mapper.get_lower_order_matrix_adjusted_to_the_higher_one()

comparison_model= EqualOrderMarkovMatrixComparator(model_1_adjusted_to_2,model_2)
comaprison = comparison_model.get_probabilities_for_transitions()
expected,observed=comparison_model.get_probablilities_expected_and_observed()
コード例 #49
0
def get_excel_data_frame(read_excel, skiprows=0, header=None, names=None):
    data = ExcelFile(read_excel)
    df = data.parse(data.sheet_names[0], header=header,index_col=None, skiprows=skiprows,names=names, )
    return df
コード例 #50
0
ファイル: bank.py プロジェクト: astrikos/nl_banks_stats
 def import_data(self, transactions_file):
     xls = ExcelFile(transactions_file)
     self.data = xls.parse('Sheet0', index_col=3, na_values=['NA'])
コード例 #51
0
ファイル: build.py プロジェクト: Pyke75/ga
def build_hdf_fr():
        
    # population
    DIR = '../../data_fr/proj_pop_insee'
        
    store = HDFStore(os.path.join(DIR,'proj_pop.h5'))    
    sex_dict = {0: 'populationH', 1: 'populationF'} 

    for fil in os.listdir(DIR):
        if fil[:7] == 'projpop':
            filename = os.path.join(DIR, fil)
            xls = ExcelFile(filename)
#            sheets = xls.sheet_names
            pop = None
                    
            for sex, sheet in sex_dict.items():
                df = xls.parse(sheet, skiprows = [0,1,2,3],
                           na_values=['NA'], index_col = 0)
                df = df.reset_index()
                del df[df.columns[0]]
                for i in arange(109,114): df = df.drop([i])
                # Rename index
                df.index.names = ['age']    
                df.columns = df.columns.astype('int32')
                df = df.unstack()
                df.index.names[0] = 'year'
                df = df.reset_index()
                df['sex'] = sex
                if pop is None:
                    pop = df
                else:
                    pop = pop.append(df)
               
            pop['pop'] = pop[0]
            del pop[0]
            
            s = pop[pop['age']>=100] 
            s = s.set_index(['age', 'sex', 'year'])
            s = s.sum(axis=0, level = ['sex', 'year'])
            
            pop = pop.set_index(['age', 'sex', 'year'])

            for t in s.index:
                pop.set_value( (100,) + t, 'pop', s.ix[t]['pop'])

            for a in range(101,109):
                pop = pop.drop(a, axis =0, level="age")
            print file[:-4]
            store[file[:-4]] = pop

    store.close()
    
    # profiles
    DIR = '../../data_fr'
    profile_file = 'profils.xls'
    store = HDFStore(os.path.join(DIR,'profiles.h5'))
    filename = os.path.join(DIR, profile_file)
    xls = ExcelFile(filename)
    sheets = xls.sheet_names
    profiles = None
    for sheet in sheets:
        df = xls.parse(sheet)
        df['age'] = df['age'].astype(int)
        df['sex'] = df['sex'].astype(int)
        df['year'] = 1996
        df = df.set_index(['age', 'sex','year']) 
        
        
        if profiles is None:
            profiles = df
        else:
            profiles = profiles.merge(df,right_index=True, left_index=True)
        
    store['profiles'] = profiles
    

    
    store.close()
    print 'DONE'
コード例 #52
0
       '3uL_HP_0_A4_1','3uL_HP_0_A6_1','3uL_HP_0_A7_1',
       '3uL_HP_0_A8_1','3uL_HP_0_A9_1','4uL_HP_0_A11_1','4uL_HP_0_A12_1',
       '4uL_HP_0_B1_1','4uL_HP_0_B10_1','4uL_HP_0_B12_1','4uL_HP_0_B3_1',
       '4uL_HP_0_B4_1','4uL_HP_0_B6_1','4uL_HP_0_B7_1','4uL_HP_0_B9_1',
       '5uL_HP_0_C1_1','5uL_HP_0_C10_1','5uL_HP_0_C2_1','5uL_HP_0_C3_1',
       '5uL_HP_0_C4_1','5uL_HP_0_C5_1','5uL_HP_0_C6_1','5uL_HP_0_C7_1',
       '5uL_HP_0_C8_1','5uL_HP_0_C9_1','6uL_HP_0_C11_1','6uL_HP_0_C12_1',
       '6uL_HP_0_D1_1','6uL_HP_0_D2_1','6uL_HP_0_D3_1','6uL_HP_0_D4_1',
       '6uL_HP_0_D5_1','6uL_HP_0_D6_1','6uL_HP_0_D7_1','6uL_HP_0_D8_1',
       '7uL_HP_0_D9_1','7uL_HP_0_E1_1','7uL_HP_0_E3_1','7uL_HP_0_E4_1',
       '7uL_HP_0_E6_1','7uL_HP_0_E7_1','7uL_HP_0_E9_1','7uL_HP_0_F1_1',
       '7uL_HP_0_F2_1','7uL_HP_0_F3_1',
       'STD_0_B11_1','STD_0_B2_1','STD_0_B5_1','STD_0_B8_1','STD_0_E2_1',
       'STD_0_E5_1','STD_0_E8_1']

data=ExcelFile('D:\\Database\\Origianl Intensity.xls')
dict_merged={}
dict_filtered={}
dffinal=DataFrame(dict_merged)

for sheetname in sheet:
   dict_merged[sheetname]=data.parse(sheetname,skiprows=[0,1])

drop_columns=['time', 'SN', 'Quality Fac.', 'Res.', 'Area', 'Rel. Intens.', 'FWHM', 'Chi^2', 'Bk. Peak']
dict_dropped={}

'''
for sheetname in sheet:
   dict_dropped[sheetname]=pd.DataFrame(dict_merged[sheetname].drop(drop_columns,axis=1),
                columns=['m/z','Intens.'])
コード例 #53
0
del df['Names']


# Import libraries
from pandas import ExcelFile, DataFrame, concat, date_range
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df.to_excel('Lesson3.xlsx', index=False)

# Location of file
Location = r'C:\Users\hdrojas\.xy\startups\Lesson3.xlsx'

# Create ExcelFile object
xlsx = ExcelFile(Location)

# Parse a specific sheet
df = xlsx.parse('sheet1',index_col='StatusDate')
df.dtypes
#list index
df.index

#convert to upper
df.Names = df.Names.apply(lambda x: x.upper())
# Only grab where Status == 1
df = df[df['Status'] == 1]

#- For all records in the State column where they are equal to NJ, replace them with NY.
df.Names[df.Names == 'BOB'] = 'Chet' 
df.Names[df.Names == 'Chet'] = 'John'
コード例 #54
0
ファイル: excel.py プロジェクト: JeffHeard/ga_resources
 def get_dataset(self, *args, **kwargs):
     xls = ExcelFile(self.resource.resource_file.path)
     if 'sheet' in kwargs:
         return xls.parse(kwargs['sheet'])
     return xls.parse("Sheet1")
コード例 #55
0
ファイル: actions.py プロジェクト: strohne/Demunge
    def readDataFile(self,filename,filetype,tab=None):
        try:
            filepath, fileext = os.path.splitext(filename)
            filepath, filebasename = os.path.split(filename)

            if not filetype in filetypes:
                if fileext == '.xlsx':
                    filetype = filetype_xlsx
                if fileext == '.csv':
                    filetype = filetype = filetype_excelcsv


            if filetype == filetype_xlsx:
                xl = ExcelFile(filename)
                for sheet in xl.sheet_names:
                    try:
                        df = xl.parse(sheet)

                        if tab is None:
                            tab = self.mainWindow.TableTabs.addTable()
                            tab.setDataFrame(df)
                            tab.setName(filebasename+" "+sheet)
                            tab = None
                        else:
                            tab.setDataFrame(df)
                            tab.setName(filebasename+" "+sheet)
                            break

                    except IndexError:
                        pass

            elif filetype == filetype_excelcsv:
                df = read_csv(filename, sep=';',encoding='cp1252',dtype=str)
                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            elif filetype == filetype_excelunicode:
                df = read_csv(filename, sep="\t",encoding='utf-16LE',dtype=str)
                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            elif filetype == filetype_facepager:

# Automatically detect and remove BOM?
#                 infile = open(filename, 'rb')
#                 raw = infile.read(2)
#                 for enc,boms in \
#                         ('utf-8',(codecs.BOM_UTF8,)),\
#                         ('utf-16',(codecs.BOM_UTF16_LE,codecs.BOM_UTF16_BE)),\
#                         ('utf-32',(codecs.BOM_UTF32_LE,codecs.BOM_UTF32_BE)):
#                     if any(raw.startswith(bom) for bom in boms):
#                         encoding = enc
#
#                         break

                df = read_csv(filename, sep=";",encoding='utf-8-sig',dtype=str)

                firstcolumn = df.columns.values[0]
                firstcolumn = firstcolumn[1:]
                firstcolumn = firstcolumn[:-1]
                df.columns = [firstcolumn] + df.columns.values[1:].tolist()


                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            if tab is not None:
                tab.show()

        except Exception as e:
            self.mainWindow.logmessage(e)
コード例 #56
0
def fill_pop_data():

    h5_insee = ExcelFile(pop_insee)

    for year in range(1996, 2007):
        print year

        # On extrait la feuille qui nous intéresse :
        xls = h5_insee.parse(str(year), index_col=0)
        print xls.columns
        age_max = max(xls["age"])
        print "    age_max = ", age_max

        # On sépare les hommes et les femmes puis on crée la colonne sexe
        xls_men = xls.loc[:, ["men", "age", "year"]]
        xls_wom = xls.loc[:, ["women", "age", "year"]]

        xls_men["sex"] = 0
        xls_wom["sex"] = 1

        if year == 1996:
            print "initialisation", year
            xls_men.set_index(["age", "sex", "year"], inplace=True)
            xls_wom.set_index(["age", "sex", "year"], inplace=True)

            corrected_pop_men = xls_men
            corrected_pop_wom = xls_wom
            print corrected_pop_men.head().to_string()

        else:
            # Il faut gérer le changement de notation des données insee :
            # à partir de 2000 on enregistre les gens jusqu'à 105 ans au lieu de 100

            if age_max > 100:
                print "    Age maximal > 100"
                print range(age_max.astype("int"), 99, -1)

                # On somme les personnes de 100 ans et plus
                tot_men = xls_men.men[xls_men.age >= 100].sum()
                tot_wom = xls_wom.women[xls_wom.age >= 100].sum()
                print tot_men, tot_wom

                # On remplace la valeur des centanaires par la valeur calculée
                # puis on coupe les dataframes :
                xls_men.loc[xls_men.age == 100, "men"] = tot_men
                xls_wom.loc[xls_wom.age == 100, "women"] = tot_wom

                xls_men.set_index(["age", "sex", "year"], inplace=True)
                xls_wom.set_index(["age", "sex", "year"], inplace=True)

                xls_men = xls_men.loc[:(100, 0, year), :]
                xls_wom = xls_wom.loc[:(100, 1, year), :]

                # On combine avec le reste :
                corrected_pop_men = concat([corrected_pop_men, xls_men])
                corrected_pop_wom = concat([corrected_pop_wom, xls_wom])

            if age_max == 100:
                # On met en place les index puis on combine
                xls_men.set_index(["age", "sex", "year"], inplace=True)
                xls_wom.set_index(["age", "sex", "year"], inplace=True)

                corrected_pop_men = concat([corrected_pop_men, xls_men])
                corrected_pop_wom = concat([corrected_pop_wom, xls_wom])

                print corrected_pop_men.head().to_string()

            if age_max < 100:
                raise Exception("the maximum recorded age is below 100")

        print len(corrected_pop_men), "    longueur de corrected_pop"

    print "    fin des boucles"
    print corrected_pop_men.columns
    corrected_pop_men.columns = ["pop"]
    corrected_pop_wom.columns = ["pop"]

    print corrected_pop_men.head(10).to_string()

    corrected_pop = concat([corrected_pop_men, corrected_pop_wom])
    print corrected_pop.head().to_string()
    print len(corrected_pop)
    store_pop = HDFStore(os.path.join(SRC_PATH, "countries", country, "sources", "Carole_Bonnet", "pop_1996_2006.h5"))
    store_pop["population"] = corrected_pop
コード例 #57
0
def test():
    print "Entering the simulation of C. Bonnet"

    simulation = Simulation()
    population_scenario = "projpop0760_FECbasESPbasMIGbas"
    simulation.load_population(population_filename, population_scenario)

    # Adding missing population data between 1996 and 2007 :
    store_pop = HDFStore(os.path.join(SRC_PATH, "countries", country, "sources", "Carole_Bonnet", "pop_1996_2006.h5"))
    corrected_pop = store_pop["population"]
    print simulation.population.head().to_string()
    print corrected_pop.head().to_string()
    print "    longueurs des inputs"
    print "prévisions insee", len(simulation.population), "population corrigée", len(corrected_pop)

    simulation.population = concat([corrected_pop, simulation.population])
    print "    longueur après combinaison", len(simulation.population)

    # Loading profiles :
    simulation.load_profiles(profiles_filename)
    xls = ExcelFile(CBonnet_results)

    """
    Hypothesis set #1 : 
    actualization rate r = 3%
    growth rate g = 1%
    net_gov_wealth = -3217.7e+09 (unit : Franc Français (FRF) of 1996)
    non ventilated government spendings in 1996 : 1094e+09 FRF
    """

    # Setting parameters :
    year_length = 250
    simulation.year_length = year_length
    r = 0.03
    g = 0.01
    n = 0.00
    net_gov_wealth = -3217.7e09
    year_gov_spending = (1094) * 1e09

    #     avg_gov_spendings = 0
    #     # List w/ the economic affairs
    #     spending_list = [241861, 246856, 245483, 251110, 261752, 271019,
    #                      286330,    290499,    301556,    315994,    315979,    332317,
    #                      343392,    352239,    356353,    356858]
    #     count = 0
    #     for spent in spending_list:
    #         year_gov_spending = spent*1e+06*((1+g)/(1+r))**count*6.55957
    #         print year_gov_spending
    #         net_gov_spendings += year_gov_spending
    #         avg_gov_spendings += year_gov_spending
    #         count += 1

    #     avg_gov_spendings /= (count)
    #     print 'avg_gov_spendings = ', avg_gov_spendings

    # Loading simulation's parameters :
    simulation.set_population_projection(year_length=year_length, method="stable")
    simulation.set_tax_projection(method="per_capita", rate=g)
    simulation.set_growth_rate(g)
    simulation.set_discount_rate(r)
    simulation.set_population_growth_rate(n)
    simulation.create_cohorts()
    simulation.set_gov_wealth(net_gov_wealth)
    simulation.set_gov_spendings(year_gov_spending, default=True, compute=True)

    # Calculating net transfers :
    # Net_transfers = tax paid to the state minus money recieved from the state
    taxes_list = ["tva", "tipp", "cot", "irpp", "impot", "property"]
    payments_list = ["chomage", "retraite", "revsoc", "maladie", "educ"]
    simulation.cohorts.compute_net_transfers(name="net_transfers", taxes_list=taxes_list, payments_list=payments_list)

    """
    Reproducing the table 2 : Comptes générationnels par âge et sexe (Compte central)
    """
    # Generating generationnal accounts :
    year = 1996
    simulation.create_present_values(typ="net_transfers")
    print "PER CAPITA PV"
    print simulation.percapita_pv.xs(0, level="age").head(10)
    print simulation.percapita_pv.xs((0, year), level=["sex", "year"]).head(10)

    # Calculating the Intertemporal Public Liability
    ipl = simulation.compute_ipl(typ="net_transfers")
    print "------------------------------------"
    print "IPL =", ipl
    print "share of the GDP : ", ipl / 8050.6e09 * 100, "%"
    print "------------------------------------"

    # Calculating the generational imbalance
    gen_imbalance = simulation.compute_gen_imbalance(typ="net_transfers")
    print "----------------------------------"
    print "[n_1/n_0=", gen_imbalance, "]"
    print "----------------------------------"

    # Creating age classes
    cohorts_age_class = simulation.create_age_class(typ="net_transfers", step=5)
    cohorts_age_class._types = [
        u"tva",
        u"tipp",
        u"cot",
        u"irpp",
        u"impot",
        u"property",
        u"chomage",
        u"retraite",
        u"revsoc",
        u"maladie",
        u"educ",
        u"net_transfers",
    ]
    age_class_pv_fe = cohorts_age_class.xs((1, year), level=["sex", "year"])
    age_class_pv_ma = cohorts_age_class.xs((0, year), level=["sex", "year"])

    print "AGE CLASS PV"
    print age_class_pv_fe.head()
    print age_class_pv_ma.head()

    age_class_pv = concat([age_class_pv_fe, age_class_pv_ma], axis=1)
    print age_class_pv
    age_class_pv.to_excel(str(xls_adress) + "\calibration.xlsx", "compte_generation")

    # Plotting
    age_class_pv = cohorts_age_class.xs(year, level="year").unstack(level="sex")
    age_class_pv = age_class_pv["net_transfers"]
    age_class_pv.columns = ["men", "women"]
    #     age_class_pv['total'] = age_class_pv_ma['net_transfers'] + age_class_pv_fe['net_transfers']
    #     age_class_pv['total'] *= 1.0/2.0
    age_class_theory = xls.parse("Feuil1", index_col=0)

    age_class_pv["men_CBonnet"] = age_class_theory["men_Cbonnet"]
    age_class_pv["women_CBonnet"] = age_class_theory["women_Cbonnet"]
    age_class_pv.plot(style="--")
    plt.legend()
    plt.axhline(linewidth=2, color="black")
    plt.show()