Python ExcelFileの例、pandas.ExcelFile Pythonの例

コード例 #1

0

ファイルを表示

ファイル: aggregates_all_years.py プロジェクト: Iliato/openfisca-qt

def diag_aggregates():

    years = ['2006', '2007', '2008', '2009']

    df_final = None
    for yr in years:
        xls = ExcelFile(fname_all)
        df = xls.parse(yr, hindex_col= True)

        cols = [u"Mesure",
                u"Dépense \n(millions d'€)",
                u"Bénéficiaires \n(milliers)",
                u"Dépenses \nréelles \n(millions d'€)",
                u"Bénéficiaires \nréels \n(milliers)",
                u"Diff. relative \nDépenses",
                u"Diff. relative \nBénéficiaires"]
        selected_cols = [u"Mesure", u"Diff. relative \nDépenses", u"Diff. relative \nBénéficiaires"]
        df = df[selected_cols]
        df['year'] = yr
        df['num'] = range(len(df.index))
        df = df.set_index(['num', u'Mesure', 'year'])
        if df_final is None:
            df_final = df
        else:

            df_final = df_final.append(df, ignore_index=False)

#    DataFrame.groupby()
    df_final = df_final.sortlevel(0)
    print str(fname_all)[:-5]+'_diag.xlsx'
    writer = ExcelWriter(str(fname_all)[:-5]+'_diag.xlsx')
    df_final.to_excel(writer, sheet_name="diagnostics", float_format="%.2f")
    writer.save()

コード例 #2

0

ファイルを表示

class EXCEL:
    def __init__(self, xls_filepath):
        self.filepath = xls_filepath
        self.xls_reader = ExcelFile(xls_filepath)
        self.sheet_names = self.xls_reader.sheet_names
        if len(self.sheet_names) == 1:
            self.select_sheet(self.sheet_names[0])
        self.time = datetime.datetime.now()

    def add(self):
        pass

    @property
    def data(self):
        return self._data

    def select_sheet(self, sheet_name):
        self._data = self.xls_reader.parse(
            sheet_name)  #self._data = pd.read_excel(xls_filepath)

    def merge_sheet(self):
        sheets = []
        for sheet_name in self.sheet_names:
            sheet = self.xls_reader.parse(sheet_name)
            sheets.append(sheet)
        self._data = pd.concat(sheets)

    def save(self, xls_filepath, sheet_name='Sheet5'):
        self.xls_reader.close()
        self.xls_writer = ExcelWriter(xls_filepath)
        self._data.to_excel(self.xls_writer, sheet_name)
        self.xls_writer.save()

コード例 #3

0

ファイルを表示

ファイル: nordpool.py プロジェクト: espenfb/SmartTS_Lab

def read_excel(fname, header=None):
    """Read excel into dict.
    Args:
        fname: name of excel file
        header: The finland files does not have a header
    Output:
        dictionary containing the data
    """
    xls = ExcelFile(fname)
    if header:
        parse_cols = [1]
    else:
        parse_cols = None

    df = xls.parse(xls.sheet_names[0], skiprows=1, parse_cols=parse_cols)

    # Fix keys
    temp = df.to_dict()
    for key in temp:
        new_key = key.replace(" - ", "_")
        temp[new_key] = temp.pop(key)
    # Stupid hack for Finland
    if header:
        temp[header] = temp.pop(temp.keys()[0])

    return temp

コード例 #4

0

ファイルを表示

ファイル: data_tools.py プロジェクト: PTB-M4D/GridSens

def network_UKGDS(filename,header=28):
	"""
	Load Excel file with UKGDS data format and build dict array of bus coordinates
	and graph structure suitable for plotting with the networkx module.
	"""
	from numpy import array,where
	from pandas import ExcelFile
	from networkx import Graph

	data = ExcelFile(filename)
	bus = data.parse("Buses",header=header)
	branch = data.parse("Branches",header=header)
	pos = {}
	for node in range(len(bus["BNU"])):
		pos.update({node:array([bus["BXC"][node],bus["BYC"][node]])})
	net = []
	for k in range(len(branch["CFB"])):
		von = where(bus["BNU"]==branch["CFB"][k])[0][0]
		zu  = where(bus["BNU"]==branch["CTB"][k])[0][0]
		net.append([von,zu])
	nodes = set([n1 for n1,n2 in net] + [n2 for n1,n2 in net])
	G = Graph()
	for node in nodes:
		G.add_node(node)
	for edge in net:
		G.add_edge(edge[0],edge[1])
	return G,pos

コード例 #5

0

ファイルを表示

ファイル: xls_converter.py プロジェクト: suibh/mic-tac-toe

    def convert(self, file_bytes):
        """Accepts a bytes array and returns a json string """

        excel_file = None

        try:
            excel_file = ExcelFile(BytesIO(file_bytes))
        except Exception as e:
            log.error("Error reading in excel bytes, {}".format(e))

        if excel_file is None:
            return self._default_value()

        if self.sheet_reader.sheet_name not in excel_file.sheet_names:
            return self._default_value()

        try:
            df = excel_file.parse(self.sheet_reader.sheet_name)
            success, output = self.sheet_reader.read(df)

            if not success:
                return self._default_value()

            return True, json.dumps(output, sort_keys=True)
        except Exception as e:
            log.error("Error parsing file: {}".format(e))

        return self._default_value()

コード例 #6

0

ファイルを表示

def uploadfile_store(request):

    if request.method == 'POST':

        try:

            filename = request.FILES['fileupload'].name
            filedata = request.FILES['fileupload'].read()

            file_extension = os.path.splitext(filename)[-1]



            if file_extension == ".xls" or file_extension == ".xlsx":

                excel_data = ExcelFile(StringIO.StringIO(filedata))
                df = excel_data.parse(excel_data.sheet_names[0],header=None, index_col=None, na_values="")
                df=df.fillna("")
                ht = df.to_html(header=True, index=True, float_format=lambda x: '%10.2f' % x, classes="table table-bordered table-striped draggable").encode('utf-8')




                return HttpResponse("""
                <html><head><script type="text/javascript">
                window.top.ClearUploadEisup();            
                </script>
                <style>
                table {
                        border-collapse: collapse;
                        margin-left: 30px;
                }
    
                table, th, td {
                        border: 1px solid black;
                        font-family: Verdana, Arial, Helvetica, sans-serif; 
                        font-size: 8pt;  
                }
                </style>
                </head>%s</html>
                """ % ht)


            else:



                return HttpResponse("""
                <html><head><script type="text/javascript">                
                    window.top.ClearUploadEisup();            
                    alert("Формат файла не поддерживается!");
                </script></head></html>
                """)


        except:
            return HttpResponse("""
            <html><head><script type="text/javascript">                
            </script></head></html>
            """)

コード例 #7

0

ファイルを表示

ファイル: frametime.py プロジェクト: fibn144/nipet

    def from_excel(self, excel_file, units):
        """Pulls timing info from excel file and stores in an array.
        Parameters
        ----------
            excel_file:
                the name of the file to import from. 
                e.g. file.xls
            units:
                the units the imported data is in
        """
        try:
            df = ExcelFile(excel_file).parse('Sheet1') #dataframe
            rec = df.to_records()

            #can be converted to numpy array
            #by using rec.astype all the same type
            #then calling .view(that type) with the result 
            #supposedly this is faster than the below method

            dat_arr = np.array(rec.tolist()) #pirate

            #get rid of the 'index' column from pandas
            self.data = dat_arr[0:dat_arr.shape[0], 1:self.col_num + 1]
            self.units = units
        except IOError:
            print "Oops."
        try:
            self._validate_frames()
        except FrameError:
            raise DataError('Bad data', self.data, excel_file)

コード例 #8

0

ファイルを表示

def read_and_save(file_name, log_file):
    print(f"Reading {file_name }")
    xls = ExcelFile(file_name)
    data = xls.parse(xls.sheet_names[0])
    for row_index, row in data.iterrows():
        try:
            if 'OFFICE' in row.keys():
                branch = row['OFFICE']
            else:
                branch = row['BRANCH']
            if 'BANK NAME' in row.keys():
                bank_name = row['BANK NAME']
            else:
                bank_name = row['BANK']

            # checking if already there or not
            obj = BankDetail.objects.filter(ifsc_code=row['IFSC'])
            if obj:
                obj.update(branch_name=branch,
                           bank_name=bank_name,
                           branch_address=row['ADDRESS'])
            else:
                BankDetail(ifsc_code=row['IFSC'],
                           branch_name=branch,
                           bank_name=bank_name,
                           branch_address=row['ADDRESS']).save()

        except KeyError as e:
            log_file.write(file_name)
            print(e)
            print(
                f"Error while reading file { file_name.split(os.sep)[-1] }\nSkipping."
            )
            break

コード例 #9

0

ファイルを表示

ファイル: core.py プロジェクト: zhiwentech/pyBankTransactions

def parse_trans_bonx(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '交易机构': '交易网点',
        '交易类型': '交易方式',
        '借贷标识': '借贷标志',
        '对方行名': '对方开户行',
        '对方名称': '对方户名',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=4)
        if len(tmp_acc_strs) == 0:
            continue
        _name = tmp_acc_strs.iloc[1, 0].split('：')[1]
        _account = tmp_acc_strs.iloc[2, 0].split('：')[1]
        _card = tmp_acc_strs.iloc[3, 0].split('：')[1]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=6,
                                           dtype=str)
        tmp_trans_sheet.columns = tmp_trans_sheet.columns.str.strip()
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['卡号'] = _card
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num

コード例 #10

0

ファイルを表示

 def __init__(self, xls_filepath):
     self.filepath = xls_filepath
     self.xls_reader = ExcelFile(xls_filepath)
     self.sheet_names = self.xls_reader.sheet_names
     if len(self.sheet_names) == 1:
         self.select_sheet(self.sheet_names[0])
     self.time = datetime.datetime.now()

コード例 #11

0

ファイルを表示

def build_totals():
    h5_name = "../amounts.h5"
    store = HDFStore(h5_name)
    files = [
        'logement_tous_regime', 'openfisca_pfam_tous_regimes',
        'minima_sociaux_tous_regimes', 'IRPP_PPE', 'cotisations_RegimeGeneral'
    ]
    first = True
    for xlsfile in files:
        xls = ExcelFile(xlsfile + '.xlsx')
        df_a = xls.parse('amounts', na_values=['NA'])
        try:
            df_b = xls.parse('benef', na_values=['NA'])
        except:
            df_b = DataFrame()

        if first:
            amounts_df = df_a
            benef_df = df_b
            first = False
        else:
            amounts_df = concat([amounts_df, df_a])
            benef_df = concat([benef_df, df_b])

    amounts_df, benef_df = amounts_df.set_index("var"), benef_df.set_index(
        "var")
    print amounts_df.to_string()
    print benef_df.to_string()
    store['amounts'] = amounts_df
    store['benef'] = benef_df
    store.close

コード例 #12

0

ファイルを表示

ファイル: core.py プロジェクト: zhiwentech/pyBankTransactions

def parse_trans_pab(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '借方发生额': '交易金额',
        '交易对方户名': '对方户名',
        '交易对方账号': '对方账号',
        '交易对方行名称': '对方开户行',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=5)
        tmp_acc_strs.dropna(how='all', axis=1, inplace=True)
        _name = tmp_acc_strs.iloc[1, 3]
        _account = tmp_acc_strs.iloc[1, 1]
        _card_num = tmp_acc_strs.iloc[2, 1]
        _currency = tmp_acc_strs.iloc[4, 3]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=6,
                                           dtype=str,
                                           skipfooter=2)
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['卡号'] = _card_num
        tmp_trans_sheet['币种'] = _currency
        tmp_trans_sheet['交易金额'] = tmp_trans_sheet['交易金额'].str.replace(',', '')
        tmp_trans_sheet['贷方发生额'] = tmp_trans_sheet['贷方发生额'].str.replace(
            ',', '')
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num

コード例 #13

0

ファイルを表示

    def build_lookup_table(data_model: FileDataModel, value_column,
                           label_column, workflow_spec_id, field_id):
        """ In some cases the lookup table can be very large.  This method will add all values to the database
         in a way that can be searched and returned via an api call - rather than sending the full set of
          options along with the form.  It will only open the file and process the options if something has
          changed.  """
        xls = ExcelFile(data_model.data)
        df = xls.parse(
            xls.sheet_names[0])  # Currently we only look at the fist sheet.
        df = pd.DataFrame(df).replace({np.nan: None})
        if value_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (data_model.file_model.name, value_column))
        if label_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (data_model.file_model.name, label_column))

        lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id,
                                       field_id=field_id,
                                       file_data_model_id=data_model.id,
                                       is_ldap=False)

        db.session.add(lookup_model)
        for index, row in df.iterrows():
            lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                          value=row[value_column],
                                          label=row[label_column],
                                          data=row.to_dict(OrderedDict))
            db.session.add(lookup_data)
        db.session.commit()
        return lookup_model

コード例 #14

0

ファイルを表示

ファイル: corpus.py プロジェクト: estnltk/pfe

def excel_to_corpus(excel_path, corpus_path):
    '''NB! Make sure to use .xls file extension for Excel files.'''
    corpus = PyCorpus(corpus_path)
    excel  = ExcelFile(excel_path)
    # as we do not know the number of sheets, we parse all of them
    # until we obtain a error
    idx = 0
    while True:
        try:
            df = excel.parse(str(idx))
            # recreate some information that was modified when exporting to xls
            new_df = dict()
            for col in df.columns:
                data = []
                for v in df[col]:
                    if type(v) == float and math.isnan(v):
                        data.append(None)
                    elif v == 0:
                        data.append(False)
                    elif v == 1:
                        data.append(True)
                    else:
                        data.append(v)
                new_df[col] = Series(data)
            corpus[str(idx)] = DataFrame(new_df)
        except xlrd.biffh.XLRDError:
            break
        idx += 1
    corpus.close()

コード例 #15

0

ファイルを表示

ファイル: build_aggregates_from_sources.py プロジェクト: Iliato/openfisca-france

def build_totals():
    h5_name = "../amounts.h5"
    store = HDFStore(h5_name)

    files = ['logement_tous_regime', 'pfam_tous_regimes',
             'minima_sociaux_tous_regimes', 'IRPP_PPE', 'cotisations_TousRegimes' ]

    first = True
    for xlsfile in files:
        xls = ExcelFile(xlsfile + '.xlsx')
        print xls.path_or_buf
        df_a = xls.parse('amounts', na_values=['NA'])
        try:
            df_b   = xls.parse('benef', na_values=['NA'])
        except:
            df_b = DataFrame()

        if first:
            amounts_df = df_a
            benef_df =  df_b
            first = False
        else:
            amounts_df = concat([amounts_df, df_a])
            benef_df =  concat([benef_df, df_b])

    amounts_df, benef_df = amounts_df.set_index("var"), benef_df.set_index("var")
    print amounts_df.to_string()
    print benef_df.to_string()
    store['amounts'] = amounts_df
    store['benef']   = benef_df
    store.close

コード例 #16

0

ファイルを表示

    def action_import_pricelists(self):
        for item in self:
            active_id = item._context['active_id']
            pricelists_obj = item.env[item._context['active_model']]
            product_obj = item.env['product.product']
            data = StringIO(item.binary_data.decode('base64'))
            xls = ExcelFile(data)
            data = xls.parse(xls.sheet_names[0])
            pricelists_dict = data.to_dict()

            for running in range(0, len(pricelists_dict['PID'])):
                pid = str(pricelists_dict['PID'][running]).zfill(7)
                price_inc_vat = pricelists_dict['Price (Inc. Vat)'][running]
                product_id = product_obj.search([('default_code', '=', pid)
                                                 ]).id

                if not product_id:
                    raise except_orm(_('PID does not exist: %r') % (pid, ))
                if not pid or not price_inc_vat:
                    raise except_orm(_('Some PID or Price have empty text.'))

                pricelists_obj.pricelists_line_ids.create({
                    'pricelists_id':
                    active_id,
                    'product_id':
                    product_id,
                    'price_inc_vat':
                    price_inc_vat,
                })

        return {'type': 'ir.actions.act_window_close'}

コード例 #17

0

ファイルを表示

ファイル: core.py プロジェクト: zhiwentech/pyBankTransactions

def parse_trans_psbc(excel_file: pd.ExcelFile, tmp_trans_list_by_sheet) -> int:
    col_map = {
        '交易渠道': '交易方式',
        '交易机构名称': '交易网点',
        '对方账号/卡号/汇票号': '对方账号',
        '对方开户机构': '对方开户行',
    }
    tmp_line_num = 0
    for sheet in excel_file.sheet_names:
        tmp_acc_strs = excel_file.parse(sheet_name=sheet, header=None, nrows=4)
        if len(tmp_acc_strs) == 0:
            continue
        _tmp_str = tmp_acc_strs.iloc[1, 0].split(':')
        _name = _tmp_str[2]
        _account = _tmp_str[1].split()[0]
        _currency = tmp_acc_strs.iloc[3, 0].split(':')[1].split()[0]
        tmp_trans_sheet = excel_file.parse(sheet_name=sheet,
                                           header=5,
                                           dtype=str,
                                           skipfooter=3)
        tmp_trans_sheet.columns = tmp_trans_sheet.columns.str.strip()
        tmp_trans_sheet.rename(columns=col_map, inplace=True)
        tmp_trans_sheet['户名'] = _name
        tmp_trans_sheet['账号'] = _account
        tmp_trans_sheet['币种'] = _currency
        tmp_trans_list_by_sheet.append(tmp_trans_sheet)
        tmp_line_num += len(tmp_trans_sheet)
    return tmp_line_num

コード例 #18

0

ファイルを表示

 def __init__(self, input_file):
     """
     Create a class to import and parse the excel spreadsheet that is used
     as an input file for V/UQ-predictivity.
     """
     self.file_name = input_file
     # Import the excel file:
     self.xlfile = ExcelFile(self.file_name)  # to retrieve & work w/ input

コード例 #19

0

ファイルを表示

def dump_xlsx2dict(xlsx_file):
    xls = ExcelFile(xlsx_file)
    df = xls.parse(xls.sheet_names[0])
    dict = df.to_dict()
    dict2list = [{key: value[i] for key, value in dict.items()}
           for i in range(len(dict['01_PatientName']))]

    return dict2list

コード例 #20

0

ファイルを表示

def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()

コード例 #21

0

ファイルを表示

ファイル: build_actualisation_groups_from_sources.py プロジェクト: Iliato/openfisca-france

def build_actualisation_group_names_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('defs', na_values=['NA'])
    store['names'] = df
    print df.to_string()
    store.close()

コード例 #22

0

ファイルを表示

def xls_to_dict(filepath):
    try:
        xls = ExcelFile(filepath)
    except IOError:
        print '%s File Not found' % filepath
        return {}
    df = xls.parse(xls.sheet_names[0])
    d = df.to_dict(orient='records')
    return d

コード例 #23

0

ファイルを表示

ファイル: aggregates_all_years.py プロジェクト: stanislasrybak/openfisca-qt

def get_loyer_inflator(year):

    xls = ExcelFile('../countries/france/data/sources/loyers.xlsx')
    df = xls.parse('data', na_values=['NA'])
    irl_2006 = df[(df['year'] == 2006) & (df['quarter'] == 1)]['irl']
    #    print irl_2006
    irl = df[(df['year'] == year) & (df['quarter'] == 1)]['irl']
    #    print irl
    return float(irl.values / irl_2006.values)

コード例 #24

0

ファイルを表示

ファイル: doexample.py プロジェクト: sharkevolution/requirements

def start(file_name, download_name="example.xlsx"):

    with open(file_name, "rb") as f:
        text = f.read()

    excel_data = ExcelFile(io.BytesIO(text), engine='openpyxl')
    test_frame = excel_data.parse(excel_data.sheet_names[0])

    return create_download_link_excel(test_frame, download_name)

コード例 #25

0

ファイルを表示

ファイル: aggregates_all_years.py プロジェクト: Iliato/openfisca-qt

def get_loyer_inflator(year):

    xls = ExcelFile('../countries/france/data/sources/loyers.xlsx')
    df = xls.parse('data', na_values=['NA'])
    irl_2006 = df[ (df['year'] == 2006) & (df['quarter'] == 1)]['irl']
#    print irl_2006
    irl = df[ (df['year'] == year) & (df['quarter'] == 1)]['irl']
#    print irl
    return float(irl.values/irl_2006.values)

コード例 #26

0

ファイルを表示

ファイル: cherry.py プロジェクト: mcvmcv/cherry

	def openDialog(self):
		'''Opens a saved .xls file.'''
		title									= 'Open a saved project file...'
		fileName,f								= QFileDialog.getOpenFileName(self,title,self.path)
		excelFile								= ExcelFile(fileName)
		self.__clearAll()
		[self.markers.append(Table(sheet,excelFile.parse(sheet))) for sheet in excelFile.sheet_names]
		[self.tabs.addTab(marker,marker.name) for marker in self.markers]
		self.__updateView()

コード例 #27

0

ファイルを表示

    def build_lookup_table(file_id,
                           file_name,
                           file_data,
                           value_column,
                           label_column,
                           workflow_spec_id=None,
                           task_spec_id=None,
                           field_id=None):
        """ In some cases the lookup table can be very large.  This method will add all values to the database
         in a way that can be searched and returned via an api call - rather than sending the full set of
          options along with the form.  It will only open the file and process the options if something has
          changed.  """
        try:
            xlsx = ExcelFile(file_data, engine='openpyxl')
        # Pandas--or at least openpyxl, cannot read old xls files.
        # The error comes back as zipfile.BadZipFile because xlsx files are zipped xml files
        except BadZipFile:
            raise ApiError(
                code='excel_error',
                message=
                f"Error opening excel file {file_name}. You may have an older .xls spreadsheet. (file_model_id: {file_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})"
            )
        df = xlsx.parse(
            xlsx.sheet_names[0])  # Currently we only look at the fist sheet.
        df = df.convert_dtypes()
        df = df.loc[:, ~df.columns.str.contains(
            '^Unnamed')]  # Drop unnamed columns.
        df = pd.DataFrame(df).dropna(how='all')  # Drop null rows
        df = pd.DataFrame(df).replace({NA: ''})

        if value_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (file_name, value_column))
        if label_column not in df:
            raise ApiError(
                "invalid_enum",
                "The file %s does not contain a column named % s" %
                (file_name, label_column))

        lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id,
                                       field_id=field_id,
                                       task_spec_id=task_spec_id,
                                       file_model_id=file_id,
                                       is_ldap=False)

        db.session.add(lookup_model)
        for index, row in df.iterrows():
            lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                          value=row[value_column],
                                          label=row[label_column],
                                          data=row.to_dict(OrderedDict))
            db.session.add(lookup_data)
        db.session.commit()
        return lookup_model

コード例 #28

0

ファイルを表示

ファイル: Google_Transit.py プロジェクト: arash2060/Google-Directions

def loadfields():
    '''
    Get the variable names in the chosen excel sheet
    '''

    filename = ents[1][1].get()
    f = path.basename(filename)
    status.set("Status: loading data and column names of %s" %
               f.encode().decode())
    adds = ExcelFile(filename)
    sheet = sheet_combo.get()
    #   if first row is not entered, assume 1 and set the form to 1.
    if frow.get() == "":
        frow.insert(0, 1)
        first_row = 1
    else:
        first_row = int(frow.get())
    print("%s and %s onwards chosen." % (sheet, first_row))
    df = adds.parse(sheet, skiprows=first_row - 1)
    #print(df.columns.values)
    print("There are %s observations on this file." % len(df.index))
    [
        'Business Name:', 'Street Number:', 'Street Name:', 'City/Borough:',
        'Zipcode:', 'Boro Code:'
    ]
    defaults = {
        0: 'trade',
        1: 'legal',
        2: 'originaladdress',
        3: 'streetnumber',
        4: 'streetname',
        5: 'Borough',
        6: 'pzip',
        7: 'boro',
        8: 'state',
        9: '',
        10: ''
    }
    for i in range(len(combos)):
        collist = list(df.columns.values)
        collist.append("")
        combs[i][1]['state'] = 'enabled'
        combs[i][2]['state'] = 'enabled'
        combs[i][1]['values'] = sorted(collist, key=keyfunction)
        combs[i][2]['values'] = sorted(collist, key=keyfunction)
        choose_default(i, 1, collist, defaults[i])
        choose_default(i, 2, collist, defaults[i])
    chk['state'] = 'enabled'
    b4['state'] = 'enabled'
    #    print(combs[0][0], df[combs[0][1].get()].head(10))
    status.set(
        "Status: Choose address fields, optionally edit output file, and press 'Geocode'"
    )
    global DFrame
    DFrame = df
    return df

コード例 #29

0

ファイルを表示

ファイル: functions.py プロジェクト: gvenki/tools

def drop_duplicates(final, keep):
    '''Drop's the Duplicate rows as some files have same repeated rows'''
    xls_file = ExcelFile(final, index=False)
    df = xls_file.parse('Page 1')
    print("\nDropping duplicates")
    df4 = df.drop_duplicates(subset=['Number', 'Expert_Assigned', 'Opened', 'Definition', 'Value', 'Created'],
                             keep=keep)
    df5 = df4.sort_values(['Number', 'Created', 'Definition', 'Expert_Assigned'], ascending=[True, True, False, True])
    df5.to_excel(final, sheet_name='Page 1', index=False)
    return

コード例 #30

0

ファイルを表示

ファイル: parser.py プロジェクト: thesgc/cbh_core_ws

def get_custom_field_config(filename, sheetname):
    '''Early example of the import of a custom field config based upon a list of field names'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data.columns = ["name", "required", "description"]
    data["required"] = data["required"].apply(is_true)

    data = data.fillna('')
    mydata = [{key: unicode(value) for key, value in point.items()} for point in data.T.to_dict().values()]
    return mydata

コード例 #31

0

ファイルを表示

ファイル: helpers.py プロジェクト: jdgsmallwood/school_timetabling

def read_excel(filename):
    '''
    Read Excel File provided by filename.

    :param filename - path to an Excel file:
    :return: pandas dataframe
    '''
    xl = ExcelFile(filename)
    df = xl.parse(xl.sheet_names[0])
    return df

コード例 #32

0

ファイルを表示

ファイル: utils.py プロジェクト: wgor/ComOpt_Loan

def data_import(file) -> Dict[str, DataFrame]:
    xl = ExcelFile(file)
    output = dict()
    for sheet in xl.sheet_names:
        if "Flags" in sheet or "EMS" in sheet:
            output[sheet] = xl.parse(sheet_name=sheet, index_col="Parameter")
        else:
            try:
                output[sheet] = xl.parse(sheet_name=sheet, index_col="time")
            except ValueError:
                output[sheet] = xl.parse(sheet_name=sheet)
    return output

コード例 #33

0

ファイルを表示

ファイル: parser.py プロジェクト: tsufz/chembiohub_ws

def get_custom_field_config(filename, sheetname):
    '''Early example of the import of a custom field config based upon a list of field names'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data.columns = ["name", "required", "description"]
    data["required"] = data["required"].apply(is_true)

    data = data.fillna('')
    mydata = [{key: unicode(value)
               for key, value in point.items()}
              for point in data.T.to_dict().values()]
    return mydata

コード例 #34

0

ファイルを表示

def uploadfile_page2(request):


    if request.method == 'POST':

        if request.POST.has_key("reestrproj") and request.FILES.has_key("file"):

            reestrproj = request.POST["reestrproj"]

            filename = request.FILES['file'].name
            filedata = request.FILES['file'].read()

            file_extension = os.path.splitext(filename)[-1]

            if file_extension != ".xls" and file_extension != ".xlsx":

                return HttpResponse("""
                <html><head><script type="text/javascript">
                    window.top.ClearUploadP2();
                    alert("Формат файла не поддерживается!");
                </script></head></html>
                """)

            else:

                rp = reestr_proj.objects.get(pk=int(reestrproj, 10))

                excel_data = ExcelFile(StringIO.StringIO(filedata))
                df = excel_data.parse(excel_data.sheet_names[0], header=None)
                df=df.fillna("")
                ht = df.to_html(header=False,index=False, float_format=lambda x: '%10.2f' % x, classes="table table-bordered small").encode('utf-8')

                data = rp.data
                data["excel"] = ht
                rp.data = data
                rp.save()

                reestr_proj_comment.objects.create(
                    reestr_proj = rp,
                    user = request.user,
                    comment = u"Загружена таблица показателей",
                    log=True
                )



    return HttpResponse("""
    <html><head><script type="text/javascript">
        window.top.ClearUploadP2();
        window.top.GetTableExcel();
        window.top.GetListComments();
    </script></head></html>
    """)

コード例 #35

0

ファイルを表示

ファイル: parser.py プロジェクト: thesgc/cbh_core_ws

def get_sheet(filename, sheetname):
    '''Extracts a list of dicts from a worksheet of an Excel file along with the
    column names, data types and maximum widths'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data = data.fillna('')
    orig_cols = tuple(data.columns)
    replace = [get_key_from_field_name(column) for column in data.columns]
    data.columns = replace
    types = copy(data.dtypes)
    for col in replace:
        data[col] = data[col].values.astype(unicode)
    return (data.T.to_dict().values(), orig_cols, types, get_widths(data))

コード例 #36

0

ファイルを表示

ファイル: data_import.py プロジェクト: Anamitr/ericsson-interview-task

def load_moc_and_engineers_info(excel_file: pd.ExcelFile):
    print("Loading MoC and engineers info")
    meta_info = excel_file.parse(INPUT_SHEET_NAME, header=4, usecols="A:C")
    meta_info.rename(columns={meta_info.columns[1]: "E-mail"}, inplace=True)

    moc_info_df = meta_info[meta_info['Name'].astype(str).str.startswith(
        'MoC')]
    engineer_df = meta_info[meta_info['Name'].astype(str).str.startswith(
        'Engineer')]
    engineer_begin_index = find_engineer_begin_index(
        excel_file.parse(INPUT_SHEET_NAME, usecols="A", header=None))
    print("Found engineer begin index at:", engineer_begin_index)
    return moc_info_df, engineer_df, engineer_begin_index

コード例 #37

0

ファイルを表示

ファイル: parser.py プロジェクト: tsufz/chembiohub_ws

def get_sheet(filename, sheetname):
    '''Extracts a list of dicts from a worksheet of an Excel file along with the
    column names, data types and maximum widths'''
    xls = ExcelFile(filename)
    data = xls.parse(sheetname, index_col=None, na_values=[''])
    data = data.fillna('')
    orig_cols = tuple(data.columns)
    replace = [get_key_from_field_name(column) for column in data.columns]
    data.columns = replace
    types = copy(data.dtypes)
    for col in replace:
        data[col] = data[col].values.astype(unicode)
    return (data.T.to_dict().values(), orig_cols, types, get_widths(data))

コード例 #38

0

ファイルを表示

ファイル: functions.py プロジェクト: gvenki/tools

def top_row(final):  # naming the indexes of the file
    '''Rename the top row of the sheet with required column names'''
    xls_file1 = ExcelFile(final, index=True)
    df = xls_file1.parse('Page 1')
    print("\nRenaming")
    # naming the indexes of the file
    df4 = df.rename(
        columns={"Unnamed: 0": 'Number', "Unnamed: 1": 'Priority', "Unnamed: 2": 'Opened', "Unnamed: 3": 'Definition',
                 "Unnamed: 4": 'Value', "Unnamed: 5": 'Expert_Assigned', "Unnamed: 6": 'Created',
                 "Unnamed: 7": 'End_time', "Unnamed: 8": 'Resolved', "Unnamed: 9": 'New_Resolved',
                 "Unnamed: 10": 'Closed', "Unnamed: 11": 'Main_file'})
    df4.to_excel(final, sheet_name='Page 1', index=False)
    return

コード例 #39

0

ファイルを表示

ファイル: build_actualisation_groups_from_sources.py プロジェクト: Iliato/openfisca-france

def build_actualisation_group_amounts_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df_a = xls.parse('amounts', na_values=['NA'])
    df_a = df_a.set_index(['case'], drop= True)
    df_b = xls.parse('benef', na_values=['NA'])
    df_c = xls.parse('corresp', na_values=['NA'])
    store['amounts'] = df_a
    store['benef']   = df_b
    store['corresp'] = df_c
    print df_a.to_string()
    print df_a.columns
    store.close()

コード例 #40

0

ファイルを表示

ファイル: StudentService.py プロジェクト: VadokDev/MOSS-UTFSM

 def studentsExcelToList(self, fileName, startIndex):
     file = ExcelFile(fileName)
     studentsRawData = file.parse(
         file.sheet_names[0]).to_numpy()[startIndex::]
     for student in studentsRawData:
         yield [
             student[1],
             student[3],
             student[5],
             student[6],
             student[7],
             student[9],
             student[10],
         ]

コード例 #41

0

ファイルを表示

ファイル: cherry.py プロジェクト: mcvmcv/cherry

	def createSamplesFromExcelKea(self):
		'''Opens a Kea Sample Batch spreadsheet and imports the samples.'''
		title									= 'Locate Kea sample batch spreadsheet...'
		fileName,f								= QFileDialog.getOpenFileName(self,title,self.path)
		excelFile								= ExcelFile(fileName)
		imported								= excelFile.parse('Data')
		imported								= imported[['PlantID','Sample ID','Plate No','Position on Plate(s)']]
		imported.columns						= ['Plant','Sample','Origin plate','Origin well']
		imported								= imported.dropna(how='all',subset=['Origin plate','Origin well'])
		imported['From plate']					= imported['Origin plate']
		imported['From well']					= imported['Origin well']
		imported['Plate']						= imported['Origin plate']
		imported['Well']						= imported['Origin well']
		self.markers[0].table					= self.markers[0].table.append(imported,ignore_index=True)
		self.__updateView()

コード例 #42

0

ファイルを表示

ファイル: build_actualisation_groups_from_sources.py プロジェクト: Iliato/openfisca-france

def build_actualisation_group_vars_h5():
    h5_name = "../actualisation_groups.h5"
    store = HDFStore(h5_name)
    xls = ExcelFile('actualisation_groups.xls')
    df = xls.parse('data', na_values=['NA'])
    store['vars'] = df
    print df.to_string()
    print store
    from numpy import unique
    coeff_list = sorted(unique(df['coeff'].dropna()))
    print coeff_list
    groups = {}
    for coeff in coeff_list:
        groups[coeff] = list(df[ df['coeff']==coeff ]['var'])
    print groups
    store.close()

コード例 #43

0

ファイルを表示

ファイル: controller.py プロジェクト: apetrin/OC16

 def load_auditory(self, file):
     """
     Повторяющиеся загружены не будут
     :param file:
     :return:
     """
     excel_file = ExcelFile(file)
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "settings" in unresolved_dict.keys():
             tmp = Auditory(unresolved_dict, outer_name=name)
             if tmp.inner_name in self.auds.keys():
                 del tmp
             else:
                 self.auds[tmp.inner_name] = tmp

コード例 #44

0

ファイルを表示

ファイル: views.py プロジェクト: avkozlov/statov

def list(request):
    # Handle file upload
    if request.method == 'POST':
        form = DocumentForm(request.POST, request.FILES)
        if form.is_valid():
            newdoc = Document(docfile = request.FILES['docfile'])
            newdoc.save()



            # Redirect to the document list after POST
            return HttpResponseRedirect(reverse('myproject.myapp.views.list'))

    else:
        form = DocumentForm() # A empty, unbound form


    # Load documents for the list page
    documents = Document.objects.all()[4:]

    a = Document.objects.last()
    url = 'myproject' + a.docfile.url

    if os.path.isfile(url):
        vic = 'TRUE'

        v = ExcelFile(url).parse("Sheet1", parse_cols=[0, 18, 26, 25, 23])
        v.columns = ['Request','Product', 'Paid', 'PaidDate', 'Type']
        # it is right!.to_datetime,
        v.PaidDate = pd.to_datetime(v.PaidDate, format='%d.%m.%Y' )
        df = pd.pivot_table(v, values='Paid', rows='PaidDate', cols=['Type', 'Product'], aggfunc=[np.sum, np.count_nonzero])
        df = df.resample('M', how='sum')
        df = df.fillna(value=0)

        # Render list page with the documents and the form
        return render_to_response(
            'myapp/list.html',
            {'documents': documents, 'form': form, 'df': df.to_html(classes="table-condensed"),'url':url, 'vic': vic},
            context_instance=RequestContext(request)
        )
    else:
        vic = 'False'
        return render_to_response(
            'myapp/list.html',
            {'documents': documents, 'form': form, 'url': url, 'vic': vic},
            context_instance=RequestContext(request)
        )

コード例 #45

0

ファイルを表示

ファイル: age_structure.py プロジェクト: Iliato/openfisca-qt

def build_from_insee( directory = None, verbose=False):

    if directory is None:
        directory = os.path.dirname(__file__)

    fname = os.path.join(directory, H5_FILENAME)
    store = HDFStore(fname)
    xls = ExcelFile(os.path.join(model.DATA_SOURCES_DIR, "sd2010_t6_fm.xls"))

    df_age_final = None

    for year in range(2006,2010):
        sheet_name = str(year)

        df = xls.parse(sheet_name, header=0, index_col=0, skiprows=8, parse_cols=[1,2], na_values=['NA'])

        df.index.name = u"âge"
        df.rename(columns = {"Unnamed: 1" : year}, inplace = True)

        # Dealing with te 90 et plus and 105 et plus
        df = df.reset_index()
        df = df.dropna(axis=0)
        df.set_value(106,u"âge", 105)
        df = df.set_index(u"âge")
        df.drop(df.index[90], axis=0, inplace=True)
        df.index.name = u"âge"
        df = df.reset_index()
        if verbose:
            print "year : " + str(year)
            print df.to_string()


        if df_age_final is None:
            df_age_final = df
        else:
            df_age_final = df_age_final.merge(df)

    if verbose:
        print df_age_final.to_string()
        print df_age_final.dtypes

    from numpy import dtype
    df_age_final[u"âge"] = df_age_final[u"âge"].astype(dtype("int64"))
    store.put("insee", df_age_final)

コード例 #46

0

ファイルを表示

ファイル: controller.py プロジェクト: apetrin/OC16

 def __init__(self, file, from_pickle=False):
     if from_pickle:
         data = pickle.load(file)
         Checker.clean_global_init(data["checker_meta"])
         Seat.counters = data["seats_meta"]
         self.__dict__.update(data["controller"].__dict__)
         return
     self.email_handle = list()
     self.mode = {"people": "None"}
     self.last_change = None
     self.people = pd.DataFrame()
     self.auds = dict()
     self.inds = list()
     self.teams = list()
     self.seed = 1
     found_main_settings = False
     excel_file = ExcelFile(file)
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "main_settings" in unresolved_dict.keys():
             if found_main_settings:
                 raise ControllerException("Две страницы с общими настройками!")
             found_main_settings = True
             Checker.raw_global_init(unresolved_dict)
             self.checker = Checker()
     if not found_main_settings:
         raise TypeError("Настройки не найдены, на странице с настройками нужен ключ main_settings")
     for name in excel_file.sheet_names:
         raw_frame = excel_file.parse(name, index_col=None, header=None)
         unresolved_dict = splitter(raw_frame, named=True)
         if "main_settings" not in unresolved_dict.keys():
             tmp = Auditory(unresolved_dict, outer_name=name)
             if tmp.inner_name in self.auds.keys():
                 del tmp
                 raise TypeError("Есть одинаковые аудитории")
             else:
                 self.auds[tmp.inner_name] = tmp
     self._message_upd()

コード例 #47

0

ファイルを表示

ファイル: pull excel merge mess with strings back to excel.py プロジェクト: teddymcw/Python-SPSS-AlamedaCounty

from pandas import DataFrame, ExcelFile
import pandas as pd
import json


# Path to excel file
# Your path will be different, please modify the path below.
location = r'c:/users/meinzerc/Desktop/table.xlsx'

# Create ExcelFile object
xls = ExcelFile(location)

# Parse the excel file
table = xls.parse('Sheet1')
df.head()

location = r'c:/users/meinzerc/Desktop/base.xlsx'

# Create ExcelFile object
xls = ExcelFile(location)

# Parse the excel file
base = xls.parse('Sheet2')
base.head()

base.columns = ['File', 'b', 'c', 'd', 'e','f','g','h']
basecut= base[['File','h']]
h=pd.DataFrame(basecut.h)
final=basecut.File.str.split('\\xa0+\s*\\xa0*')
abc=pd.DataFrame(final.tolist(), columns = ['a','b','c','e','f'],index=final.index)
work=pd.merge(abc,h,how='left',left_index=True,right_index=True)

コード例 #48

0

ファイルを表示

ファイル: __init__.py プロジェクト: olga-k/DiscussionMarkovModel

# Python version 2.7.5
from pandas import ExcelFile
from markovchain import MarkovChain
from orderstatemapper import OrderStateMapper
from equalordermarkovmatrixcomparator import EqualOrderMarkovMatrixComparator
from scipy.stats import chisquare
from array import array

xlsx = ExcelFile('dane.xls')
data = xlsx.parse('strona', parse_cols=[1, 7], index_col=None, na_values=['NA'])

#order = 2
#map(lambda x: MarkovChain(x, order).stdout() , data.groupby('grupa').kto.tolist())

model_2 = MarkovChain( data['kto'],2).markov_matrix
#print(model_2.markov_matrix.keys()[0][0][0:])

model_1 = MarkovChain( data['kto'],1).markov_matrix

model_3 = MarkovChain( data['kto'],3).markov_matrix


mapper = OrderStateMapper(model_1, 1, model_2, 2)


model_1_adjusted_to_2=mapper.get_lower_order_matrix_adjusted_to_the_higher_one()

comparison_model= EqualOrderMarkovMatrixComparator(model_1_adjusted_to_2,model_2)
comaprison = comparison_model.get_probabilities_for_transitions()
expected,observed=comparison_model.get_probablilities_expected_and_observed()

コード例 #49

0

ファイルを表示

ファイル: dataframe_handler.py プロジェクト: thesgc/chembiocrunch

def get_excel_data_frame(read_excel, skiprows=0, header=None, names=None):
    data = ExcelFile(read_excel)
    df = data.parse(data.sheet_names[0], header=header,index_col=None, skiprows=skiprows,names=names, )
    return df

コード例 #50

0

ファイルを表示

ファイル: bank.py プロジェクト: astrikos/nl_banks_stats

 def import_data(self, transactions_file):
     xls = ExcelFile(transactions_file)
     self.data = xls.parse('Sheet0', index_col=3, na_values=['NA'])

コード例 #51

0

ファイルを表示

ファイル: build.py プロジェクト: Pyke75/ga

def build_hdf_fr():
        
    # population
    DIR = '../../data_fr/proj_pop_insee'
        
    store = HDFStore(os.path.join(DIR,'proj_pop.h5'))    
    sex_dict = {0: 'populationH', 1: 'populationF'} 

    for fil in os.listdir(DIR):
        if fil[:7] == 'projpop':
            filename = os.path.join(DIR, fil)
            xls = ExcelFile(filename)
#            sheets = xls.sheet_names
            pop = None
                    
            for sex, sheet in sex_dict.items():
                df = xls.parse(sheet, skiprows = [0,1,2,3],
                           na_values=['NA'], index_col = 0)
                df = df.reset_index()
                del df[df.columns[0]]
                for i in arange(109,114): df = df.drop([i])
                # Rename index
                df.index.names = ['age']    
                df.columns = df.columns.astype('int32')
                df = df.unstack()
                df.index.names[0] = 'year'
                df = df.reset_index()
                df['sex'] = sex
                if pop is None:
                    pop = df
                else:
                    pop = pop.append(df)
               
            pop['pop'] = pop[0]
            del pop[0]
            
            s = pop[pop['age']>=100] 
            s = s.set_index(['age', 'sex', 'year'])
            s = s.sum(axis=0, level = ['sex', 'year'])
            
            pop = pop.set_index(['age', 'sex', 'year'])

            for t in s.index:
                pop.set_value( (100,) + t, 'pop', s.ix[t]['pop'])

            for a in range(101,109):
                pop = pop.drop(a, axis =0, level="age")
            print file[:-4]
            store[file[:-4]] = pop

    store.close()
    
    # profiles
    DIR = '../../data_fr'
    profile_file = 'profils.xls'
    store = HDFStore(os.path.join(DIR,'profiles.h5'))
    filename = os.path.join(DIR, profile_file)
    xls = ExcelFile(filename)
    sheets = xls.sheet_names
    profiles = None
    for sheet in sheets:
        df = xls.parse(sheet)
        df['age'] = df['age'].astype(int)
        df['sex'] = df['sex'].astype(int)
        df['year'] = 1996
        df = df.set_index(['age', 'sex','year']) 
        
        
        if profiles is None:
            profiles = df
        else:
            profiles = profiles.merge(df,right_index=True, left_index=True)
        
    store['profiles'] = profiles
    

    
    store.close()
    print 'DONE'

コード例 #52

0

ファイルを表示

ファイル: extract_intensity_optimized.py プロジェクト: YangChuan80/Routine

       '3uL_HP_0_A4_1','3uL_HP_0_A6_1','3uL_HP_0_A7_1',
       '3uL_HP_0_A8_1','3uL_HP_0_A9_1','4uL_HP_0_A11_1','4uL_HP_0_A12_1',
       '4uL_HP_0_B1_1','4uL_HP_0_B10_1','4uL_HP_0_B12_1','4uL_HP_0_B3_1',
       '4uL_HP_0_B4_1','4uL_HP_0_B6_1','4uL_HP_0_B7_1','4uL_HP_0_B9_1',
       '5uL_HP_0_C1_1','5uL_HP_0_C10_1','5uL_HP_0_C2_1','5uL_HP_0_C3_1',
       '5uL_HP_0_C4_1','5uL_HP_0_C5_1','5uL_HP_0_C6_1','5uL_HP_0_C7_1',
       '5uL_HP_0_C8_1','5uL_HP_0_C9_1','6uL_HP_0_C11_1','6uL_HP_0_C12_1',
       '6uL_HP_0_D1_1','6uL_HP_0_D2_1','6uL_HP_0_D3_1','6uL_HP_0_D4_1',
       '6uL_HP_0_D5_1','6uL_HP_0_D6_1','6uL_HP_0_D7_1','6uL_HP_0_D8_1',
       '7uL_HP_0_D9_1','7uL_HP_0_E1_1','7uL_HP_0_E3_1','7uL_HP_0_E4_1',
       '7uL_HP_0_E6_1','7uL_HP_0_E7_1','7uL_HP_0_E9_1','7uL_HP_0_F1_1',
       '7uL_HP_0_F2_1','7uL_HP_0_F3_1',
       'STD_0_B11_1','STD_0_B2_1','STD_0_B5_1','STD_0_B8_1','STD_0_E2_1',
       'STD_0_E5_1','STD_0_E8_1']

data=ExcelFile('D:\\Database\\Origianl Intensity.xls')
dict_merged={}
dict_filtered={}
dffinal=DataFrame(dict_merged)

for sheetname in sheet:
   dict_merged[sheetname]=data.parse(sheetname,skiprows=[0,1])

drop_columns=['time', 'SN', 'Quality Fac.', 'Res.', 'Area', 'Rel. Intens.', 'FWHM', 'Chi^2', 'Bk. Peak']
dict_dropped={}

'''
for sheetname in sheet:
   dict_dropped[sheetname]=pd.DataFrame(dict_merged[sheetname].drop(drop_columns,axis=1),
                columns=['m/z','Intens.'])

コード例 #53

0

ファイルを表示

ファイル: pandapractice.py プロジェクト: teddymcw/Python-SPSS-AlamedaCounty

del df['Names']


# Import libraries
from pandas import ExcelFile, DataFrame, concat, date_range
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df.to_excel('Lesson3.xlsx', index=False)

# Location of file
Location = r'C:\Users\hdrojas\.xy\startups\Lesson3.xlsx'

# Create ExcelFile object
xlsx = ExcelFile(Location)

# Parse a specific sheet
df = xlsx.parse('sheet1',index_col='StatusDate')
df.dtypes
#list index
df.index

#convert to upper
df.Names = df.Names.apply(lambda x: x.upper())
# Only grab where Status == 1
df = df[df['Status'] == 1]

#- For all records in the State column where they are equal to NJ, replace them with NY.
df.Names[df.Names == 'BOB'] = 'Chet' 
df.Names[df.Names == 'Chet'] = 'John'

コード例 #54

0

ファイルを表示

ファイル: excel.py プロジェクト: JeffHeard/ga_resources

 def get_dataset(self, *args, **kwargs):
     xls = ExcelFile(self.resource.resource_file.path)
     if 'sheet' in kwargs:
         return xls.parse(kwargs['sheet'])
     return xls.parse("Sheet1")

コード例 #55

0

ファイルを表示

ファイル: actions.py プロジェクト: strohne/Demunge

    def readDataFile(self,filename,filetype,tab=None):
        try:
            filepath, fileext = os.path.splitext(filename)
            filepath, filebasename = os.path.split(filename)

            if not filetype in filetypes:
                if fileext == '.xlsx':
                    filetype = filetype_xlsx
                if fileext == '.csv':
                    filetype = filetype = filetype_excelcsv


            if filetype == filetype_xlsx:
                xl = ExcelFile(filename)
                for sheet in xl.sheet_names:
                    try:
                        df = xl.parse(sheet)

                        if tab is None:
                            tab = self.mainWindow.TableTabs.addTable()
                            tab.setDataFrame(df)
                            tab.setName(filebasename+" "+sheet)
                            tab = None
                        else:
                            tab.setDataFrame(df)
                            tab.setName(filebasename+" "+sheet)
                            break

                    except IndexError:
                        pass

            elif filetype == filetype_excelcsv:
                df = read_csv(filename, sep=';',encoding='cp1252',dtype=str)
                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            elif filetype == filetype_excelunicode:
                df = read_csv(filename, sep="\t",encoding='utf-16LE',dtype=str)
                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            elif filetype == filetype_facepager:

# Automatically detect and remove BOM?
#                 infile = open(filename, 'rb')
#                 raw = infile.read(2)
#                 for enc,boms in \
#                         ('utf-8',(codecs.BOM_UTF8,)),\
#                         ('utf-16',(codecs.BOM_UTF16_LE,codecs.BOM_UTF16_BE)),\
#                         ('utf-32',(codecs.BOM_UTF32_LE,codecs.BOM_UTF32_BE)):
#                     if any(raw.startswith(bom) for bom in boms):
#                         encoding = enc
#
#                         break

                df = read_csv(filename, sep=";",encoding='utf-8-sig',dtype=str)

                firstcolumn = df.columns.values[0]
                firstcolumn = firstcolumn[1:]
                firstcolumn = firstcolumn[:-1]
                df.columns = [firstcolumn] + df.columns.values[1:].tolist()


                if tab is None:
                    tab = self.mainWindow.TableTabs.addTable()
                tab.setDataFrame(df)
                tab.setName(filebasename)

            if tab is not None:
                tab.show()

        except Exception as e:
            self.mainWindow.logmessage(e)

コード例 #56

0

ファイルを表示

def fill_pop_data():

    h5_insee = ExcelFile(pop_insee)

    for year in range(1996, 2007):
        print year

        # On extrait la feuille qui nous intéresse :
        xls = h5_insee.parse(str(year), index_col=0)
        print xls.columns
        age_max = max(xls["age"])
        print "    age_max = ", age_max

        # On sépare les hommes et les femmes puis on crée la colonne sexe
        xls_men = xls.loc[:, ["men", "age", "year"]]
        xls_wom = xls.loc[:, ["women", "age", "year"]]

        xls_men["sex"] = 0
        xls_wom["sex"] = 1

        if year == 1996:
            print "initialisation", year
            xls_men.set_index(["age", "sex", "year"], inplace=True)
            xls_wom.set_index(["age", "sex", "year"], inplace=True)

            corrected_pop_men = xls_men
            corrected_pop_wom = xls_wom
            print corrected_pop_men.head().to_string()

        else:
            # Il faut gérer le changement de notation des données insee :
            # à partir de 2000 on enregistre les gens jusqu'à 105 ans au lieu de 100

            if age_max > 100:
                print "    Age maximal > 100"
                print range(age_max.astype("int"), 99, -1)

                # On somme les personnes de 100 ans et plus
                tot_men = xls_men.men[xls_men.age >= 100].sum()
                tot_wom = xls_wom.women[xls_wom.age >= 100].sum()
                print tot_men, tot_wom

                # On remplace la valeur des centanaires par la valeur calculée
                # puis on coupe les dataframes :
                xls_men.loc[xls_men.age == 100, "men"] = tot_men
                xls_wom.loc[xls_wom.age == 100, "women"] = tot_wom

                xls_men.set_index(["age", "sex", "year"], inplace=True)
                xls_wom.set_index(["age", "sex", "year"], inplace=True)

                xls_men = xls_men.loc[:(100, 0, year), :]
                xls_wom = xls_wom.loc[:(100, 1, year), :]

                # On combine avec le reste :
                corrected_pop_men = concat([corrected_pop_men, xls_men])
                corrected_pop_wom = concat([corrected_pop_wom, xls_wom])

            if age_max == 100:
                # On met en place les index puis on combine
                xls_men.set_index(["age", "sex", "year"], inplace=True)
                xls_wom.set_index(["age", "sex", "year"], inplace=True)

                corrected_pop_men = concat([corrected_pop_men, xls_men])
                corrected_pop_wom = concat([corrected_pop_wom, xls_wom])

                print corrected_pop_men.head().to_string()

            if age_max < 100:
                raise Exception("the maximum recorded age is below 100")

        print len(corrected_pop_men), "    longueur de corrected_pop"

    print "    fin des boucles"
    print corrected_pop_men.columns
    corrected_pop_men.columns = ["pop"]
    corrected_pop_wom.columns = ["pop"]

    print corrected_pop_men.head(10).to_string()

    corrected_pop = concat([corrected_pop_men, corrected_pop_wom])
    print corrected_pop.head().to_string()
    print len(corrected_pop)
    store_pop = HDFStore(os.path.join(SRC_PATH, "countries", country, "sources", "Carole_Bonnet", "pop_1996_2006.h5"))
    store_pop["population"] = corrected_pop

コード例 #57

0

ファイルを表示

def test():
    print "Entering the simulation of C. Bonnet"

    simulation = Simulation()
    population_scenario = "projpop0760_FECbasESPbasMIGbas"
    simulation.load_population(population_filename, population_scenario)

    # Adding missing population data between 1996 and 2007 :
    store_pop = HDFStore(os.path.join(SRC_PATH, "countries", country, "sources", "Carole_Bonnet", "pop_1996_2006.h5"))
    corrected_pop = store_pop["population"]
    print simulation.population.head().to_string()
    print corrected_pop.head().to_string()
    print "    longueurs des inputs"
    print "prévisions insee", len(simulation.population), "population corrigée", len(corrected_pop)

    simulation.population = concat([corrected_pop, simulation.population])
    print "    longueur après combinaison", len(simulation.population)

    # Loading profiles :
    simulation.load_profiles(profiles_filename)
    xls = ExcelFile(CBonnet_results)

    """
    Hypothesis set #1 : 
    actualization rate r = 3%
    growth rate g = 1%
    net_gov_wealth = -3217.7e+09 (unit : Franc Français (FRF) of 1996)
    non ventilated government spendings in 1996 : 1094e+09 FRF
    """

    # Setting parameters :
    year_length = 250
    simulation.year_length = year_length
    r = 0.03
    g = 0.01
    n = 0.00
    net_gov_wealth = -3217.7e09
    year_gov_spending = (1094) * 1e09

    #     avg_gov_spendings = 0
    #     # List w/ the economic affairs
    #     spending_list = [241861, 246856, 245483, 251110, 261752, 271019,
    #                      286330,    290499,    301556,    315994,    315979,    332317,
    #                      343392,    352239,    356353,    356858]
    #     count = 0
    #     for spent in spending_list:
    #         year_gov_spending = spent*1e+06*((1+g)/(1+r))**count*6.55957
    #         print year_gov_spending
    #         net_gov_spendings += year_gov_spending
    #         avg_gov_spendings += year_gov_spending
    #         count += 1

    #     avg_gov_spendings /= (count)
    #     print 'avg_gov_spendings = ', avg_gov_spendings

    # Loading simulation's parameters :
    simulation.set_population_projection(year_length=year_length, method="stable")
    simulation.set_tax_projection(method="per_capita", rate=g)
    simulation.set_growth_rate(g)
    simulation.set_discount_rate(r)
    simulation.set_population_growth_rate(n)
    simulation.create_cohorts()
    simulation.set_gov_wealth(net_gov_wealth)
    simulation.set_gov_spendings(year_gov_spending, default=True, compute=True)

    # Calculating net transfers :
    # Net_transfers = tax paid to the state minus money recieved from the state
    taxes_list = ["tva", "tipp", "cot", "irpp", "impot", "property"]
    payments_list = ["chomage", "retraite", "revsoc", "maladie", "educ"]
    simulation.cohorts.compute_net_transfers(name="net_transfers", taxes_list=taxes_list, payments_list=payments_list)

    """
    Reproducing the table 2 : Comptes générationnels par âge et sexe (Compte central)
    """
    # Generating generationnal accounts :
    year = 1996
    simulation.create_present_values(typ="net_transfers")
    print "PER CAPITA PV"
    print simulation.percapita_pv.xs(0, level="age").head(10)
    print simulation.percapita_pv.xs((0, year), level=["sex", "year"]).head(10)

    # Calculating the Intertemporal Public Liability
    ipl = simulation.compute_ipl(typ="net_transfers")
    print "------------------------------------"
    print "IPL =", ipl
    print "share of the GDP : ", ipl / 8050.6e09 * 100, "%"
    print "------------------------------------"

    # Calculating the generational imbalance
    gen_imbalance = simulation.compute_gen_imbalance(typ="net_transfers")
    print "----------------------------------"
    print "[n_1/n_0=", gen_imbalance, "]"
    print "----------------------------------"

    # Creating age classes
    cohorts_age_class = simulation.create_age_class(typ="net_transfers", step=5)
    cohorts_age_class._types = [
        u"tva",
        u"tipp",
        u"cot",
        u"irpp",
        u"impot",
        u"property",
        u"chomage",
        u"retraite",
        u"revsoc",
        u"maladie",
        u"educ",
        u"net_transfers",
    ]
    age_class_pv_fe = cohorts_age_class.xs((1, year), level=["sex", "year"])
    age_class_pv_ma = cohorts_age_class.xs((0, year), level=["sex", "year"])

    print "AGE CLASS PV"
    print age_class_pv_fe.head()
    print age_class_pv_ma.head()

    age_class_pv = concat([age_class_pv_fe, age_class_pv_ma], axis=1)
    print age_class_pv
    age_class_pv.to_excel(str(xls_adress) + "\calibration.xlsx", "compte_generation")

    # Plotting
    age_class_pv = cohorts_age_class.xs(year, level="year").unstack(level="sex")
    age_class_pv = age_class_pv["net_transfers"]
    age_class_pv.columns = ["men", "women"]
    #     age_class_pv['total'] = age_class_pv_ma['net_transfers'] + age_class_pv_fe['net_transfers']
    #     age_class_pv['total'] *= 1.0/2.0
    age_class_theory = xls.parse("Feuil1", index_col=0)

    age_class_pv["men_CBonnet"] = age_class_theory["men_Cbonnet"]
    age_class_pv["women_CBonnet"] = age_class_theory["women_Cbonnet"]
    age_class_pv.plot(style="--")
    plt.legend()
    plt.axhline(linewidth=2, color="black")
    plt.show()