Python read_excel Examples, pandas.read_excel Python Examples

Example #1

0

Show file

File: master_etl.py Project: 401ode/RI-Recreational-Areas

def load_master_initial_merge():
    """
    GitHub Issue #3 should be fixed:
    Initial merge is Master SCORP .csv  + GEO .xlsx file.
    Logic should pull street type and
    address fields from Geo and overwrite master.

    """
    print("Loading SCORP Master...")
    scorp_master_file = r"State Comprehensive Outdoor Recreation Plan Inventory of Facilities\MasterSCORP_Base.xlsx"
    print("Loading SCORP GEO...")
    geo_master_file = r"State Comprehensive Outdoor Recreation Plan Inventory of Facilities\SCORP_FILTER_GEO.xlsx"
    # State index_col so that data matches correctly.
    sm = pd.read_excel(scorp_master_file, index_col="OBJECTID")
    gm = pd.read_excel(geo_master_file, index_col="OBJECTID")
    # Take Street type, street, town from geo, where available.
    sm["Street_Type"] = gm["Street_Type"]
    sm["Street"] = gm["Street"]
    sm['Town'] = gm['Town']
    print("Updated Street_Type, Street, Town from GEO to Master.")
    # Export MasterSCORP_Updated.csv to be new master,
    # then return the dataframe to whomeever called it.
    export_filename = r"State Comprehensive Outdoor Recreation Plan Inventory of Facilities\MasterSCORP_Updated.csv"
    print("Exporting to {}".format(export_filename))
    sm.to_csv(export_filename)
    return sm

Example #2

0

Show file

File: Roller_Excel.py Project: berg2043/RandomLootGenerator

def magicitems(dict, roll, row):
  if dict[roll]['MI Numb'] != '0':
    if dict[roll]['MI Numb 2'] != '0':
      times2 = diceroller(dict[roll]['MI Numb 2'])
      y = 0
      items2dict = {}
      try:
        mdf2 = pd.read_excel('Items.xlsx', sheet_name = dict[roll]['Item 2'], index_col = 0, usecols = 'E:F')
        mitems2 = dictcreator(mdf2)
        while y < times2:
          rolls = random.randint(1,100)
          items2dict['var_' + str(y)] = tk.Label(root, text = mitems2[rolls]['Item']).grid(row = row + 1, column = 1, columnspan = 2)
          y += 1
          row += 1
      except SyntaxError:
        pass
    times = diceroller(dict[roll]['MI Numb'])
    x = 0
    itemsdict = {}
    try:
      mdf = pd.read_excel('Items.xlsx', sheet_name = dict[roll]['Item'], index_col = 0, usecols = 'E:F')
      mitems = dictcreator(mdf)
      while x < times:
        rolls = random.randint(1,100)
        itemsdict["var_" + str(x)] = tk.Label(root, text = mitems[rolls]['Item']).grid(row = row + 1, column = 1, columnspan = 2)
        itemsdict["var_" + str(x)] = tk.Label(root, text = mitems[rolls]['Item']).grid(row = row + 1, column = 1, columnspan = 2)
        itemsdict["var_" + str(x)] = tk.Label(root, text = mitems[rolls]['Item']).grid(row = row + 1, column = 1, columnspan = 2)
        x += 1
        row += 1
    except SyntaxError:
      pass
  art(dict, roll, row)

Example #3

0

Show file

File: dataPrep.py Project: KrbAlmryde/Utilities

def main():
    # Several datafiles, each with a long list of subjects

    # Directory path variable assignment, assumes script is in working directory!!!
    DATA = "data"
    MEASURE = "measure"
    EXCEL = "excel_files"

    # Mainly for testing purposes
    if len(sys.argv) > 1: 
        DATA = os.path.join(sys.argv[1], DATA)
        MEASURE = os.path.join(sys.argv[1], MEASURE)
        FINAL = os.path.join(sys.argv[1], FINAL)


    # Create a dictionary with Subtest #s as the keys and a list of the data
    # file as values. Uses a Dictionary Comprehension
    SubTestIndex = [os.path.split(_file)[1].split('_')[0].split('Test')[1] for _file in glob(os.path.join(DATA,"*.txt"))]

    for sID in SubTestIndex:  # sID => subtest ID,  eg. Sub[03A]
        pXLXS = os.path.join(EXCEL, "Sub{0}_person_measure.xlsx".format(sID))
        pTXT = os.path.join(MEASURE, "Sub{0}_person_measure.txt".format(sID))

        if os.path.exists(pXLXS):
            person_measure = pd.read_excel(pXLXS, header=None, names=['Scores', 'NaN', 'SubID', '_SubID', '_NaN'])
            person_output = person_measure[['SubID', 'Scores']]
            person_output.to_csv(pTXT, sep='\t', index=False, header=False)

            iXLXS = os.path.join(EXCEL, "Sub{0}_item_measure.xlsx".format(sID))
            iTXT = os.path.join(MEASURE, "Sub{0}_item_measure.txt".format(sID))
            pd.read_excel(iXLXS, header=None).to_csv(iTXT, sep='\t', index=False, header=False)

Example #4

0

Show file

File: census_summary.py Project: psrc/soundcast

def grouping_parcels(group):

	# Parcel to census lookup
	df = pd.read_csv(r'R:\Brice\gis\parcels_urbansim_census.txt')

	# add low income geography tag
	low_inc = pd.read_excel(r'R:\Brice\gis\special-needs\ACS_15_5YR_Low-income.xlsx', sheetname='Map-income')
	minority = pd.read_excel(r'R:\Brice\gis\special-needs\ACS_15_5YR_Minority.xlsx', sheetname='Mapping')

	if group == 'low_income':
		# Threshold for % of households as total for determining low income or not
		income_threshold = 0.5

		# Define low_inc tracts as those with more HH below 200% median income than those above it
		low_inc['% low inc'] = low_inc['Below200']/low_inc['Total']

		# Create flag for whether low income or not
		low_inc.ix[low_inc['% low inc'] >= income_threshold,'low_inc_tract'] = 1
		low_inc.ix[low_inc['% low inc'] < income_threshold,'low_inc_tract'] = 0

		# Merge with parcel file
		newdf = pd.merge(df, low_inc[['GEOID10','low_inc_tract']], on='GEOID10', how='left')
		parcels_list = newdf[newdf['low_inc_tract'] == 1].parcelid.values

	elif group == 'minority':
		minority['% minority'] = minority['Minority']/minority['Total']
		minority_threshold = 0.5
		minority.ix[minority['% minority'] >= minority_threshold, 'minority_tract'] = 1
		minority.ix[minority['% minority'] < minority_threshold, 'minority_tract'] = 0

		# Merge with parcel file
		newdf = pd.merge(df, minority[['GEOID10','minority_tract']], on='GEOID10', how='left')
		parcels_list = newdf[newdf['minority_tract'] == 1].parcelid.values

	return parcels_list

Example #5

0

Show file

File: preprocess.py Project: vmonaco/keystroke-obfuscation

def preprocess_greyc_nislab(in_file, out_file):
    """
    Preprocess the raw GREYC NISLAB dataset
    """
    df = pd.concat([pd.read_excel(in_file, sheetname=0),
                    pd.read_excel(in_file, sheetname=1),
                    pd.read_excel(in_file, sheetname=2),
                    pd.read_excel(in_file, sheetname=3),
                    pd.read_excel(in_file, sheetname=4)])

    df = df[df['Class'] == 2]

    df['age'] = (df['Age'] < 30).map({True: '<30', False: '>=30'})
    df['gender'] = df['Gender'].map({'F': 'female', 'M': 'male'})
    df['handedness'] = df['Handedness'].map({'L': 'left', 'R': 'right'})
    df['session'] = np.arange(len(df))

    df['password'] = df['Password'].map({
        'leonardo dicaprio': 1,
        'the rolling stones': 2,
        'michael schumacher': 3,
        'red hot chilli peppers': 4,
        'united states of america': 5,
    })

    def preprocess_row(idx_row):
        idx, row = idx_row
        keyname = list(map(lambda x: 'space' if x == ' ' else x, list(row['Password'])))
        v = np.array(row['Keystroke Template Vector'].strip().split()).astype(int) // 10000

        s = len(keyname) - 1
        pp, rr, pr, rp = [v[s * i:s * (i + 1)] for i in range(4)]

        timepress = np.r_[0, pp].cumsum()

        # Offset the first release time by the duration of the first key
        timerelease = np.r_[rp[0] - rr[0], rr].cumsum()

        # There are ~180 rows where timerelease == timepress.
        # Fix these by assuming at least the minimum standard clock resolution
        timerelease[timerelease == timepress] += 16
        sample = pd.DataFrame.from_items([
            ('user', row['User_ID']),
            ('session', row['session']),
            ('password', row['password']),
            ('age', row['age']),
            ('gender', row['gender']),
            ('handedness', row['handedness']),
            ('timepress', timepress),
            ('timerelease', timerelease),
            ('keyname', keyname)
        ])

        return sample

    df = pd.concat(map(preprocess_row, df.iterrows()))
    df = df.set_index(['user', 'session'])[COLS]
    df = remove_repeated_keys(df)
    df.to_csv(out_file)
    return

Example #6

0

Show file

File: piezoread.py Project: Maesh/signalsolutions

	def labelspiezo(self) :
		"""
		Uses two sets of labels from EEG scorers to keep
		only those segments corresponding to agreedupon 
		scores 
		"""
		# First get file names
		lbls1name = self.filename.strip('.mat') + '.xls'
		lbls2name = self.filename.strip('.mat') + '_2.xls'

		# Import scores as dataframes
		lbls1 = pd.read_excel(self.filepath+lbls1name, header = None)
		lbls2 = pd.read_excel(self.filepath+lbls2name, header = None)

		# Concatenate into same dataframe and keep segments where equal
		concatted = pd.concat([lbls1[0],lbls2[0]],1)
		concatted.columns = ['scorer1','scorer2']
		scoredf = concatted[concatted['scorer1']==concatted['scorer2']]

		# scoredf is a dataframe with indices corresponding to the piezo
		# segments where there is agreement, and the identical labels in
		# each column

		# first reshape the piezo
		npr = np.reshape(self.piezo,[len(self.piezo)/(self.fs*4),self.fs*4])

		# this single function slices the reshaped piezo matrix such that
		# it retains only segments where doublescored
		self.piezomat = npr[scoredf.index]
		# as_matrix ensures indices are not saved since we need only labels
		self.labels = scoredf['scorer1'].as_matrix()

Example #7

0

Show file

File: cleandata.py Project: mmillervedam/INFOW18_FinalProject

def vadir_get_cnames_replace(df_list, df_to_use):
    """
    This function determines the column differecnes between each
    of the excel files passed in.
    INPUT: list of excel files to import and the file with the
           right column names to use to compare against
    OUTPUT: dictionary of excel files as keys and list of unmatched
            columns as values of the dictionary
    """
    columns_to_use = []
    other_columns = {}
    unmatched_c = {}
    for df in df_list:
        if df == df_to_use:
            df_import = pd.read_excel(df)
            c_row = vadir_column_data_row(df_import)
            columns_to_use = vadir_clean_cnames(df_import, c_row)
            unmatched_c[df] = columns_to_use
        else:
            df_import = pd.read_excel(df)
            c_row = vadir_column_data_row(df_import)
            other_columns[df] = vadir_clean_cnames(df_import, c_row)
    for df, columns in other_columns.items():
        unmatched_c[df] = [c for c in columns if c not in columns_to_use]
    return unmatched_c

Example #8

0

Show file

File: ftp.py Project: xutaoding/constituent_stock

    def download_iter(self, file, save_path="."):
        file_path = os.sep.join([save_path, file])

        if not os.path.exists(save_path):
            os.mkdir(save_path)

        with open(file_path, "wb") as cache:
            try:
                self.retrbinary("RETR %s" % file, cache.write)
            except:
                yield "", "", pandas.DataFrame(),False
                return

        if not zipfile.is_zipfile(file_path):
            ef = pandas.ExcelFile(file_path)
            yield file, ef.sheet_names[0], pandas.read_excel(ef),False
            return

        with zipfile.ZipFile(file_path, "r") as zip:
            xlss = []
            sheet_name = ""
            for name in zip.namelist():
                ef = pandas.ExcelFile(zip.open(name))
                sheet_name = ef.sheet_names[0]
                xls = pandas.read_excel(ef)
                yield name, sheet_name, xls,True
                xlss.append(xls)

Example #9

0

Show file

File: older.py Project: zyl6698/GeoPython

def qmflt(name="qfl.xlsx", Width=1, Color='k'):

    if("csv"in name):QmFLtRaw = pd.read_csv(name)
    elif("xlsx"in name):QmFLtRaw = pd.read_excel(name)

    QmFLtRaw = pd.read_excel(name)
    qmfltline(Width, Color)
    Points = len(QmFLtRaw)
    for i in range(Points):
        q = QmFLtRaw.at[i, 'Qm']
        f = QmFLtRaw.at[i, 'F']
        l = QmFLtRaw.at[i, 'Lt']

        Q = 100 * q / (q + f + l)
        F = 100 * f / (q + f + l)
        L = 100 * l / (q + f + l)

        x = Q / 2 + (100 - Q) * L / (L + F)
        y = Q / 2 * math.sqrt(3)

        plotpoint(x, y, QmFLtRaw.at[i, 'Size'], QmFLtRaw.at[i, 'Color'], QmFLtRaw.at[i, 'Alpha'],
                  QmFLtRaw.at[i, 'Marker'])
    plt.savefig("QmFLt-Plot.png", dpi=600)
    plt.savefig("QmFLt-Plot.svg", dpi=600)
    plt.show()

Example #10

0

Show file

File: final_grader.py Project: adamrpah/ProtoClass

def main(args):
    #Load the answer key
    #Answer key must have the headings ['Problem', 'Your Answer', 'Answer Format']
    answer_df = pd.read_excel(args.answer_key, sheet=0)

    #Score
    scores = {}

    #Go through the individual sheets
    for student_answer in glob.glob(os.path.join( args.assign_dir, '*xlsx') ):
        print(student_answer)
        student_df = pd.read_excel(student_answer, sheet=0)
        #Check to make sure that the column headings are equal
        if (student_df.columns != answer_df.columns).all():
            print('ERROR with: %s' % student_answer)
        else:
            #Proceed with grading
            equal = (answer_df['Your Answer'].str.lower() == student_df['Your Answer'].str.lower())
            #Count all the false values
            eqval = equal.value_counts()
            #Pull the students name
            path, fname = os.path.split(student_answer)
            student_name = fname.split('_')[0]
            #True grade set
            scores[student_name] = eqval[eqval.index == True].values[0]

    #Sort and print a csv
    with open('exam_scores.csv', 'w') as wfile:
        print('Student,Score', file=wfile)

        for sname in sorted( list(scores.keys()) ):
            print( '%s,%d' % (sname, scores[sname]), file=wfile)

Example #11

0

Show file

File: io.py Project: glyg/cell-tracker

def get_from_excel(data_path, extra_sheet=None):
    '''
    This opens a file dialog allowing ot select an excel file containing
    the tracked data, and returns a :class:`CellCluster` object.

    Paramteters
    -----------

    data_path: the path to the excelTM file

    Returns
    -------

    cellcluster : a :class:`CellCluster` instance
         the container class for the tracking

    Notes
    -----

    The excel file should follow the structure of `excel_trajs_example.xlsx`
    in the project's `data` directory
    '''

    ### Read the data
    trajs = pd.read_excel(data_path, 0)
    trajs.t_stamp = trajs.t_stamp.astype(np.int)
    trajs.label = trajs.label.astype(np.int)
    trajs.set_index(['t_stamp', 'label'],
                    inplace=True)

    ### The Trajectories class is a subclass of
    ### pandas DataFrame
    ### Parsing excel files tends to add NaNs to the data
    trajs = Trajectories(trajs.dropna().sortlevel())
    metadata = pd.read_excel(data_path, 1)
    metadata = {name: value for name, value
                in zip(metadata['Name'], metadata['Value'])}

    metadata['FileName'] = data_path
    store_path = metadata['FileName']
    if '.' in store_path[-6:]:
        store_path = ''.join(store_path.split('.')[:-1]+['.h5'])
    else:
        store_path = store_path+'.h5'
    store_path = os.path.join(
        os.path.dirname(data_path), store_path)

    ### The ObjectsIO class
    objectsio = ObjectsIO(metadata=metadata, store_path=store_path)
    cellcluster = CellCluster(objectsio=objectsio)
    cellcluster.trajs = trajs
    cellcluster.oio['trajs'] = trajs
    if extra_sheet is not None:
        try:
            extra = pd.read_excel(data_path, extra_sheet)
            cellcluster.extra = extra
            cellcluster.oio['extra'] = extra
        except:
            print('Extra data from sheet {} not found in the file {}'.format(extra_sheet, data_path))
    return cellcluster

Example #12

0

Show file

File: egn_etl.py Project: Dookyung/egn

def extract_bloomberg_excel(str_bbDataFile, str_bbIndexFile,is_excel):
    '''
    블룸버그에서 받아온 엑셀파일을 dataframe 형식으로 변경하여 저장
    :param str_bbDataFile: 실제 데이터 파일
    :param str_bbIndexFile: 메타파일
    '''
    
    global df_bbData, df_bbDataCol
    
    if(is_excel):
        #데이터
        df_bbData = pd.read_excel(str_bbDataFile,'Sheet1')
        df_bbData = df_bbData.ix[5:,:] #제목행 및 날짜 없는행 제거
        df_bbData = df_bbData.replace('#N/A N/A','') #엑셀에서 데이터 없는 셀에 들어간 문자열 제거
        df_bbData = df_bbData.convert_objects(convert_numeric=True) #모든 컬럼은 숫자형식으로 변환
        
        #리스트
        df_bbIndex = pd.read_excel(str_bbIndexFile, 'index')
        df_bbIndex.columns = ['no','idx','cat','rgn','rgn2','rmk','undf']
        df_bbDataCol = df_bbIndex[df_bbIndex['no'].isin(df_bbData.columns)][['no','idx','rgn2']]
        
        #csv로 저장
        df_bbData.to_csv('../data/DailyEconomicData.csv',sep='\t',encoding='utf-8')
        df_bbDataCol.to_csv('../data/index.csv',sep='\t',encoding='utf-8')
    else:
        df_bbData = pd.read_csv('../data/DailyEconomicData.csv',sep='\t',encoding='utf-8')
        df_bbDataCol = pd.read_csv('../data/index.csv',sep='\t',encoding='utf-8')

Example #13

0

Show file

File: functions.py Project: ClayMason/BlackrockFBP

def xl_to_df(directory, file_dict):

	# Get excel file

	file_path = ''

	for file in file_dict:
		if not file['type'] == 'questions':
			file_path = str(directory) + '\\' + file['name']

		else:
			file_path2 = str(directory) + '\\' + file['name']

	main1 = pd.read_excel(file_path, pd.ExcelFile(file_path).sheet_names[0], encoding='utf-8')
	main2 = pd.read_excel(file_path2, pd.ExcelFile(file_path2).sheet_names[0], encoding='utf-8')

	# xls_file = pd.ExcelFile(file_path)
	# main1 = xls_file.parse( xls_file.sheet_names[0] )

	# xls_file2 = pd.ExcelFile(file_path2)
	# main2 = xls_file2.parse( xls_file2.sheet_names[0] )

	full_df = main1.append(main2)

	return full_df

Example #14

0

Show file

File: views.py Project: Mihkorz/AMD

 def get_context_data(self, **kwargs):
     context = super(DocumentDetail, self).get_context_data(**kwargs)
     
     filename = settings.MEDIA_ROOT+"/"+self.object.document.name 
     if self.object.doc_type == 1:
         sniffer = csv.Sniffer()
         dialect = sniffer.sniff(open(filename, 'r').read(), delimiters='\t,;') # defining the separator of the csv file
         df = read_csv(filename, delimiter=dialect.delimiter)
         context['input'] = df[:50].to_html()
     else:
         try:
             df = read_excel(filename, sheetname="PMS")
             context['PMS'] = df.to_html()
         except:
             pass
         try:
             df = read_excel(filename, sheetname="PMS1")
             context['PMS1'] = df.to_html()
         except:
             pass
         try:
             df = read_excel(filename, sheetname="DS1")
             context['DS1'] = df.to_html()
         except:
             pass
         try:
             df = read_excel(filename, sheetname="DS2")
             context['DS2'] = df.to_html()
         except:
             pass
     
     
     return context

Example #15

0

Show file

File: etl_to_csv.py Project: datagovuk/dgu_d7

def load_references(xls_filename, errors, validation_errors):
    # Output columns can be different. Update according to the rename_columns dict:
    try:
        dfs = pandas.read_excel(xls_filename,
                                [#'core-24-depts',
                                 '(reference) senior-staff-grades',
                                 '(reference) professions',
                                 '(reference) units',
                                 ])
    except XLRDError, e:
        if str(e) == "No sheet named <'(reference) units'>":
            validation_errors.append(str(e))
            return {}
        elif str(e) in ("No sheet named <'(reference) senior-staff-grades'>",
                      "No sheet named <'(reference) professions'>"):
            # this doesn't matter - we will use the standard_references
            # anyway. Read it again, just for the units.
            try:
                dfs = pandas.read_excel(xls_filename, ['(reference) units'])
            except XLRDError, e:
                if str(e) == "No sheet named <'(reference) units'>":
                    validation_errors.append(str(e))
                else:
                    errors.append(str(e))
                return {}

Example #16

0

Show file

File: Load.py Project: sroessner/Python

def excel(FilePath, FileName, SheetNameOrNone, *args, **kwargs):
	IndexColumn = kwargs.get('IndexColumn',None)
	from pandas import read_excel
        
	if FilePath.endswith('\\'):
		lastslash=''
	else:
		lastslash='\\'
            
	if FileName.endswith('.xlsx'):
		fext=''
	else:
		fext='.xlsx'
	
	while True:
		try:
			fullpath=FilePath+lastslash+FileName+fext
			EmptyVar=read_excel(fullpath, SheetNameOrNone, index_col=IndexColumn, na_values=['NA'])
			break
		except:
			fext='.xls'
			fullpath=FilePath+lastslash+FileName+fext
			EmptyVar=read_excel(fullpath, SheetNameOrNone, index_col=IndexColumn, na_values=['NA'])
			break

	return EmptyVar;

Example #17

0

Show file

File: classifying.py Project: xyicheng/tushare

def get_hs300s():
    """
    获取沪深300当前成份股及所占权重
    Return
    --------
    DataFrame
        code :股票代码
        name :股票名称
        date :日期
        weight:权重
    """
    try:
        df = pd.read_excel(
            ct.HS300_CLASSIFY_URL % (ct.P_TYPE["http"], ct.DOMAINS["idx"], ct.INDEX_C_COMM, ct.PAGES["hs300b"]),
            parse_cols=[0, 1],
        )
        df.columns = ct.FOR_CLASSIFY_B_COLS
        df["code"] = df["code"].map(lambda x: str(x).zfill(6))
        wt = pd.read_excel(
            ct.HS300_CLASSIFY_URL % (ct.P_TYPE["http"], ct.DOMAINS["idx"], ct.INDEX_C_COMM, ct.PAGES["hs300w"]),
            parse_cols=[0, 3, 6],
        )
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt["code"] = wt["code"].map(lambda x: str(x).zfill(6))
        return pd.merge(df, wt)
    except Exception as er:
        print(str(er))

Example #18

0

Show file

File: step_0_1_1_homogeneisation_donnees_depenses.py Project: antoinearnoud/openfisca-france-indirect-taxation

def get_transfert_data_frames(year=None):
    assert year is not None
    default_config_files_directory = os.path.join(
        pkg_resources.get_distribution("openfisca_france_indirect_taxation").location
    )
    matrice_passage_file_path = os.path.join(
        default_config_files_directory,
        "openfisca_france_indirect_taxation",
        "assets",
        "Matrice passage {}-COICOP.xls".format(year),
    )
    parametres_fiscalite_file_path = os.path.join(
        default_config_files_directory,
        "openfisca_france_indirect_taxation",
        "assets",
        "Parametres fiscalite indirecte.xls",
    )
    matrice_passage_data_frame = pandas.read_excel(matrice_passage_file_path)
    if year == 2011:
        matrice_passage_data_frame["poste2011"] = matrice_passage_data_frame["poste2011"].apply(
            lambda x: int(x.replace("c", "").lstrip("0"))
        )
    parametres_fiscalite_data_frame = pandas.read_excel(parametres_fiscalite_file_path, sheetname="categoriefiscale")
    selected_parametres_fiscalite_data_frame = parametres_fiscalite_data_frame[
        parametres_fiscalite_data_frame.annee == year
    ]
    return matrice_passage_data_frame, selected_parametres_fiscalite_data_frame

Example #19

0

Show file

    def __init__(self, file, year=None, level="Départements"):
        """
        loads the data downloaded from `data.gouv.fr <http://www.data.gouv.fr/content/search?SortBy=Pertinence&SortOrder=0&SearchText=%C3%A9lections+2012>`_.

        @param      file        xls file
        @param      year        year (optional)
        @param      level       ``Départements`` or ``Cantons``
        """
        self.year = year
        self.level = level.lower().replace("s", "")
        if isinstance(file, list):
            self.tours = file
        else:
            self.tours = [pandas.read_excel(file, sheetname="%s T1" % level),
                          pandas.read_excel(file, sheetname="%s T2" % level)]
            for i, t in enumerate(self.tours):
                if len(t) == 0:
                    raise Exception("no data for tour %d" % (i + 1))
            self.tours = [self.process_tour(_) for _ in self.tours]
            for i, t in enumerate(self.tours):
                if len(t) == 0:
                    raise Exception("no data for tour %d" % i)
            try:
                self.tours = [
                    _.sort_values("Libellé du %s" % self.level, inplace=False) for _ in self.tours]
            except Exception as e:
                message = "unable to sort, shape={1} columns={0}".format(
                    ",".join(self.tours[0].columns), self.tours[0].shape)
                raise Exception(message) from e

Example #20

0

Show file

File: cache.py Project: white-lab/pyproteome

def get_barres_seq_data(force=False):
    global BARRES_SPECIES_DATA

    if force or not os.path.exists(BARRES_SEQ_PATH):
        LOGGER.info("Downloading Barres RNA Seq Data")
        response = requests.get(BARRES_SEQ_URL, stream=True)
        response.raise_for_status()

        with open(BARRES_SEQ_PATH, mode="wb") as f:
            for block in response.iter_content(1024):
                f.write(block)

    LOGGER.info("Reading Barres RNA Seq Data")
    BARRES_SPECIES_DATA = {
        "H**o sapiens": pd.read_excel(
            BARRES_SEQ_PATH,
            sheet_name="Human data only",
            skiprows=[0],
        ).iloc[1:],
        "Mus musculus": pd.read_excel(
            BARRES_SEQ_PATH,
            sheet_name="Mouse data only",
            skiprows=[0],
        ),
    }

Example #21

0

Show file

File: exell_corrs.py Project: vasusvodorosus/LDA-ermunds

def load_All_BAVs(BAVfile,sheet_names):
    x = pandas.read_excel(BAVfile, sheet_names[0], index_col=0, na_values=['NA']).index
    data= dict();
    for sheet in sheet_names:
        df= pandas.read_excel(BAVfile, sheet, index_col=0, na_values=['NA'])
        x = intersect(x, df.index)
        
    for sheet in sheet_names:
        df =pandas.read_excel(BAVfile, sheet, index_col=0, na_values=['NA'])        
        good_cols = [col for col in df.columns if len(col.split("_"))==2]
        df= df[good_cols]
        df.columns = map(lambda x: x.split("_")[0],df.columns)
        
        try:        
            del df[u"Tough"]
        except:
            print "oh well"
        try:        
            del df[u"Visionary"]
        except:
            print "oh well"
        df=df[pruned_words]
        df = df.ix[x]        
        data[sheet]=df

    return (x,data)

Example #22

0

Show file

File: test_writers.py Project: forking-repos/pandas

    def test_excel_multindex_roundtrip(self, ext, c_idx_names, r_idx_names,
                                       c_idx_levels, r_idx_levels):
        # see gh-4679
        with ensure_clean(ext) as pth:
            if c_idx_levels == 1 and c_idx_names:
                pytest.skip("Column index name cannot be "
                            "serialized unless it's a MultiIndex")

            # Empty name case current read in as
            # unnamed levels, not Nones.
            check_names = r_idx_names or r_idx_levels <= 1

            df = mkdf(5, 5, c_idx_names, r_idx_names,
                      c_idx_levels, r_idx_levels)
            df.to_excel(pth)

            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
                                header=list(range(c_idx_levels)))
            tm.assert_frame_equal(df, act, check_names=check_names)

            df.iloc[0, :] = np.nan
            df.to_excel(pth)

            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
                                header=list(range(c_idx_levels)))
            tm.assert_frame_equal(df, act, check_names=check_names)

            df.iloc[-1, :] = np.nan
            df.to_excel(pth)
            act = pd.read_excel(pth, index_col=list(range(r_idx_levels)),
                                header=list(range(c_idx_levels)))
            tm.assert_frame_equal(df, act, check_names=check_names)

Example #23

0

Show file