Python DataFrame Examples, DataFrame Python Examples

Example #1

0

Show file

    def __init__(self,dados=None):
        self.df      = DataFrame() # DataFrame do .cpp
        self.colunas = {}    # Dicionario de colunas com seus respectivos tipos
        self.shape   = [0,0] # Quantidade de linhas x colunas
        self.indices = []    # Colunas indexadas

        # Codigo não aceita valores None.
        if dados != None:
            for coluna in dados:
                self.InserirColuna(dados[coluna],str(coluna))

Example #2

0

Show file

 def Copy(self):
     print self.currentChoose
     self.dataModel2 = DataFrameModel()
     self.tableView.setModel(self.dataModel2)
     names2 = [
         self.df.columns[self.columnChooseIndex[tab] + 6]
         for tab in range(len(self.columnChooseIndex))
     ]
     self.dataModel2.setDataFrame(DataFrame(self.df[names2[0:len(names2)]]))

Example #3

0

Show file

File: functions.py Project: Daheckwith/FYS4411

def parallelization(Dir, Par=10):
    Thr_list = [1, 2, 4, 8, 12]
    time_list = []

    for Thr in Thr_list:
        file = data.get_filename(Dir, Thr=Thr, Par=Par)
        df, time = data.initiate_df(file)
        time_list.append(float(time))

    #plt.close('all')
    plt.figure()
    # plt.title('Time elapsed per number of threads for 10 Particles and $2^{21}$ MC-cycles')
    plt.plot(Thr_list, time_list, 'o')
    plt.grid()
    plt.xlabel(r'Number of Threads')
    plt.ylabel(r'$t\ [s]$')
    plt.legend(['Time elapsed'])
    save_fig("time_per_thread.png")
    plt.show()

Example #4

0

Show file

File: functions.py Project: Daheckwith/FYS4411

def analytical_vs_numerical(Dir,
                            Importance=False,
                            BB=False,
                            NumDeriv=False,
                            Thr=8,
                            Dim=3,
                            Par=10,
                            MC_Cycles=2097152):
    Dir += f"{Dim}D/"
    if not NumDeriv:
        Dir += "Analytical/"
    else:
        Dir += "Numerical/"

    df = data.get_filename(Dir,
                           Importance=Importance,
                           BB=BB,
                           NumDeriv=NumDeriv,
                           Dim=Dim,
                           Par=Par,
                           MC_Cycles=MC_Cycles)
    # print(f"filname: {df}")
    df, time = data.initiate_df(df)
    return df, time

Example #5

0

Show file

File: Parsing.py Project: 404cn/compilers

def parsing():
    global data_map, vns, vts, stack
    token = next_token()
    top = stack.peek()
    while top != '$':
        if top == token:
            print('匹配 ' + top)
            stack.pop()
            token = next_token()
        elif top == 'ε':
            stack.pop()
        elif DataFrame.is_vt(top):
            print('error, is_vt')
            print(stack)
            break
        elif data_map.loc[top][token] == '':
            print('error, 空')
            print(stack)
            break
        elif data_map.loc[top][token] in productions:
            print('输出 ' + data_map.loc[top][token])
            stack.pop()
            reverse_production_body_to_stack(data_map.loc[top][token])
        top = stack.peek()

Example #6

0

Show file

class DataFrameMc():
    """Classe DataFrame. Pode ser inicializada recebendo um dicionario como {'coluna1':[1,10],'coluna2':['valor1','valor2']}"""
    def __init__(self,dados=None):
        self.df      = DataFrame() # DataFrame do .cpp
        self.colunas = {}    # Dicionario de colunas com seus respectivos tipos
        self.shape   = [0,0] # Quantidade de linhas x colunas
        self.indices = []    # Colunas indexadas

        # Codigo não aceita valores None.
        if dados != None:
            for coluna in dados:
                self.InserirColuna(dados[coluna],str(coluna))


    def InserirColuna(self,valores,nome_coluna):
        """ 
        Infere o tipo do dado. Caso encontre multiplos,
        transforma em string. Valores devem ser passados dentro de uma lista.
        """

        if all(isinstance(x, int) for x in valores):
            if self.shape == [0,0]:
                self.df.InserirColunaInt(valores,nome_coluna)
                self.colunas[nome_coluna] ='int'
                self.shape[0] = len(valores)
                self.shape[1] += 1
            elif self.shape[0] != len(valores):
                raise Exception("Coluna com tamanhos diferentes.")
            else:
                self.df.InserirColunaInt(valores,nome_coluna)
                self.colunas[nome_coluna] ='int'
                self.shape[1] += 1

        elif all(isinstance(x, (float,int)) for x in valores):
            if self.shape == [0,0]:
                self.df.InserirColunaDouble(valores,nome_coluna)
                self.colunas[nome_coluna] ='double'
                self.shape[0] = len(valores)
                self.shape[1] += 1
            elif self.shape[0] != len(valores):
                raise Exception("Coluna com tamanhos diferentes.")
            else:
                self.df.InserirColunaDouble(valores,nome_coluna)
                self.colunas[nome_coluna] ='double'
                self.shape[1] += 1

        else:
            if not all(isinstance(x, str) for x in valores):
                valores = [str(i) for i in list(valores)]
            if self.shape == [0,0]:
                self.df.InserirColunaString(valores,nome_coluna)
                self.colunas[nome_coluna] ='string'
                self.shape[0] = len(valores)
                self.shape[1] += 1
            elif self.shape[0] != len(valores):
                raise Exception("Coluna com tamanhos diferentes.")
            else:
                self.df.InserirColunaString(valores,nome_coluna)
                self.colunas[nome_coluna] ='string'
                self.shape[1] += 1

    def GetColuna(self,nome_coluna):
        """ 
        Recebe uma coluna e retorna uma lista contendo os valores daquela coluna.
        """
        if self.colunas[nome_coluna] == 'int':
            return self.df.GetColunaInt(nome_coluna)
        elif self.colunas[nome_coluna] == 'double':
            return self.df.GetColunaDouble(nome_coluna)
        elif self.colunas[nome_coluna] == 'string':
            return self.df.GetColunaString(nome_coluna)

    def RemoverColuna(self,nome_coluna):
        """ 
        Remove coluna do DataFrame.
        """
        if self.colunas[nome_coluna] == 'int':
            self.df.RemoverColunaInt([],nome_coluna)
            self.colunas.pop(nome_coluna)
            self.shape[1] = self.shape[1] - 1
            if nome_coluna in self.indices:
            	RemoverIndice(nome_coluna)
        elif self.colunas[nome_coluna] == 'double':
            self.df.RemoverColunaDouble([],nome_coluna)
            self.colunas.pop(nome_coluna)
            self.shape[1] = self.shape[1] - 1
            if nome_coluna in self.indices:
            	RemoverIndice(nome_coluna)
        elif self.colunas[nome_coluna] == 'string':
            self.df.RemoverColunaString([],nome_coluna)
            self.colunas.pop(nome_coluna)
            self.shape[1] = self.shape[1] - 1
            if nome_coluna in self.indices:
            	RemoverIndice(nome_coluna)

    def RemoverLinha(self, linhas):
    	self.df.RemoverLinha(linhas)
    	self.shape[0] = self.shape[0] - len(linhas)
    	indices = list(self.indices)
    	for i in indices:
    		self.IndexarColuna(i)

    def GetLoc(self,linha, nome_coluna):
        """
        Recebe lista de linhas e UMA coluna e retorna os respectivos dados.
        """
        linhas = []
        if type(linha) == int:
            linhas.append(linha)
        else:
            linhas = list(linha)

        if max(linhas) > self.shape[0]-1:
            raise Exception("Linha fornecida excedeu o DataFrame")

        if self.colunas[nome_coluna] == 'int':
            return self.df.GetLinhaInt(linhas, nome_coluna)
        elif self.colunas[nome_coluna] == 'double':
            return self.df.GetLinhaDouble(linhas, nome_coluna)
        elif self.colunas[nome_coluna] == 'string':
            return self.df.GetLinhaString(linhas, nome_coluna)

    def GetLinha(self, linha):
        """
        Recebe lista de linhas e retorna dados.
        """
        resultado = {k: [] for k in self.colunas}
        if len(linha) > 0:
	        for nome_coluna in self.colunas:
	            resultado[nome_coluna].append(self.GetLoc(linha, nome_coluna))
	        resultado = {k:v[0] for k,v in resultado.items()}
        return resultado

    def GetDados(self):
        """
        Retorna todo os dataframe em formato de dicionario.
        """
        return self.GetLinha(range(0,self.shape[0]))

    def Slice(self, linhas,nome_colunas):
        """
        Recebe lista de linhas e lista de colunas e retorna os dados respectivos
        em formato de dicionario.
        """
        if len(linhas) == 0:
            linhas = list(range(0,self.shape[0]))
        if len(nome_colunas) == 1:
            slc = self.GetLoc(linhas,nome_coluna2)
        else:
            slc = {k:[] for k in nome_colunas}
            for k in slc:
                slc[k] = self.GetLoc(linhas,k)
        return slc 



    def Show(self, dados):
        # Recebe dicionario e printa em formato tabular
    	print(tabulate(dados,tablefmt='simple',headers='keys'))

    def InserirLinha(self,valores):
        num_linhas = [len(valores[i]) for i in valores][0]
        if len(set([len(valores[i]) for i in valores]))>1:
            raise Exception("Colunas com tamanhos de linhas diferentes")
        for nome_coluna in self.colunas:
            if self.colunas[nome_coluna] == 'int':
                self.df.InserirLinhaInt(valores[nome_coluna],nome_coluna)
            elif self.colunas[nome_coluna] == 'double':
                self.df.InserirLinhaDouble(valores[nome_coluna],nome_coluna)
            elif self.colunas[nome_coluna] == 'string':
                self.df.InserirLinhaString(valores[nome_coluna],nome_coluna)
        self.shape[0] += num_linhas

    def IndexarColuna(self, nome_coluna):
        """
        Recebe nome de coluna e indexa.
        """
        if self.colunas[nome_coluna] == 'int':
            self.df.IndexarColunaInt([], nome_coluna)
            self.indices.append(nome_coluna)
            self.indices = list(sorted(set(self.indices)))
        elif self.colunas[nome_coluna] == 'double':
            self.df.IndexarColunaDouble([], nome_coluna)
            self.indices.append(nome_coluna)
            self.indices = list(sorted(set(self.indices)))
        elif self.colunas[nome_coluna] == 'string':
            self.df.IndexarColunaString([], nome_coluna)
            self.indices.append(nome_coluna)
            self.indices = list(sorted(set(self.indices)))

    def RemoverIndice(self, nome_coluna):
        """
        Remove indice.
        """
        if self.colunas[nome_coluna] == 'int':
            self.df.RemoverIndiceInt([], nome_coluna)
            self.indices.remove(nome_coluna)
        elif self.colunas[nome_coluna] == 'double':
            self.df.RemoverIndiceDouble([], nome_coluna)
            self.indices.remove(nome_coluna)
        elif self.colunas[nome_coluna] == 'string':
            self.df.RemoverIndiceString([], nome_coluna)
            self.indices.remove(nome_coluna)

    def Query_Tree(self, nome_coluna, operador,valor):
        """
        Usuario deve usar funcao Query. Essa eh um funcao auxiliar
        para fazer query na arvore binaria.
        """
        if self.colunas[nome_coluna] == 'int':
            return self.df.QueryTreeInt([valor],nome_coluna, operador)
        elif self.colunas[nome_coluna] == 'double':
            return self.df.QueryTreeDouble([valor],nome_coluna, operador)
        elif self.colunas[nome_coluna] == 'string':
            return self.df.QueryTreeString([valor],nome_coluna, operador)


    def Query_Simples(self, nome_coluna, operador, valor):
        """
        Usuario deve usar funcao Query. Essa eh um funcao auxiliar
        para fazer query sem arvore.
        """
        if self.colunas[nome_coluna] == 'int':
            return self.df.QuerySimpleInt([valor],nome_coluna, operador)
        elif self.colunas[nome_coluna] == 'double':
            return self.df.QuerySimpleDouble([valor],nome_coluna, operador)
        elif self.colunas[nome_coluna] == 'string':
            return self.df.QuerySimpleString([valor],nome_coluna, operador)

    def Query(self, nome_coluna, operador, valor):
        # Funcao para fazer query no dataframe. Operadores sao '==','<','<=','>','>='
    	if nome_coluna in self.indices:
    		return self.Query_Tree(nome_coluna,operador,valor)
    	else:
    		return self.Query_Simples(nome_coluna, operador, valor)

    def QueryRect(self, queryrect, nome_coordenada1, nome_coordenada2):
        """
        Exige que colunas de coordenadas sejam de double.
        queryrect = [xmin,ymin,xman,yman]
        """
        if self.colunas[nome_coordenada1] != 'double' or self.colunas[nome_coordenada2] != 'double':
            raise Exception('Colunas com coordenadas devem ser de doubles')
        if nome_coordenada1 in self.indices and nome_coordenada2 in self.indices:
            if len(queryrect) != 4 or not all(isinstance(x, (float,int)) for x in queryrect):
                raise Exception('queryrect deve ser uma lista com 4 valores numericos')
            return self.df.QueryRect(queryrect, nome_coordenada1, nome_coordenada2)

        else:
            raise Exception("Colunas de coordenadas precisam estar indexadas")




    # Funções de Visualização dos Dados (gráficos)

    # Funcao abaixo faz scatter plots e line plots.
    # Podem ser utilizados os mesmo argumentos que para o pyplot do matplotlib
    def Plot(self,nome_coluna1,nome_coluna2, *args,**kwargs):
        x = self.GetColuna(nome_coluna1)
        y = self.GetColuna(nome_coluna2)
        plt.plot(x,y,*args,**kwargs)

    def Hist(self,nome_coluna1, *args,**kwargs):
        x = self.GetColuna(nome_coluna1)
        plt.hist(x,*args,**kwargs)

    def Hist2D(self,nome_coluna1, nome_coluna2,*args,**kwargs):
        x = self.GetColuna(nome_coluna1)
        y = self.GetColuna(nome_coluna2)
        plt.hist2d(x,y,*args,**kwargs)

    def BarPlot(self, coluna_altura, coluna_posicao, tipo='soma',*args, **kwargs):
        altura   = self.GetColuna(coluna_altura)
        posicao  = self.GetColuna(coluna_posicao)
        categorias = sorted(list(set(self.GetColuna(coluna_posicao))))
        soma     = {k:0 for k in categorias}
        contador = {k:0 for k in categorias}
        media    = {k:0 for k in categorias}
        for cat in categorias:
            for i in range(len(self.GetColuna(coluna_altura))):
                if posicao[i] == cat:
                    contador[cat]+=1
                    soma[cat]+=altura[i]
        for cat in categorias:
            media[cat] = soma[cat]/contador[cat]

        if tipo == 'soma':
            plt.bar(soma.keys(),soma.values(),*args,**kwargs)
        if tipo == 'media':
            plt.bar(media.keys(),media.values(),*args,**kwargs)
        if tipo == 'contador':
            plt.bar(contador.keys(),contador.values(),*args,**kwargs)

Example #7

0

Show file

29/10: all_values = test_data_list[0].split(',')
29/11: print(all_values[0])
29/12: n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01)
29/13: image_array = numpy.asfarray(all_values[1:]).reshape((28,28))
29/14: matplotlib.pyplot.imshow(image_array,cmap='Greys',interpolation='None')
30/1: import numpy as np
30/2: normal_list = range(1000)
30/3: %timeit [i**2 for i in normal_list]
31/1: path = './usagov_bitly_data2013-05-17-1368828605.txt'
31/2: import json
31/3: records = [json.loads(line) for line in open(path)]
31/4: records
31/5: from pandas import DataFrame, Series
31/6: import pandas as pd
31/7: import numpy as np
31/8: frame = DataFrame(records)
31/9: frame
31/10: frame['tz']
31/11: frame['tz'][:10]
31/12: import pylab
31/13: import _tkinter
31/14: import tkinter
31/15: import Tkinter
31/16: :version
31/17: help
31/18: help()
31/19: tz_counts = frame['tz'].value_counts()
31/20: tz_counts
31/21: clean_tz = frame['tz
31/22: clean_tz = frame['tz'].fillna('Missing')
31/23: clean_tz[clean_tz == ''] = 'Unknown'

Example #8

0

Show file

File: main.py Project: Daheckwith/FYS4411

# Steplength (BF) and timestep (IMP) both set to 0.1
Dir += "Parameters/MC-Cycles/"
Par = 100; MC_Cycles = 1000 # Just an arbitrary number for this case only
file_BF             = data.get_filename(Dir, Importance= False, Thr= 1, Dim= 3, Par= Par, MC_Cycles= MC_Cycles)
file_IMP            = data.get_filename(Dir, Importance= True, Thr= 1, Dim= 3, Par= Par, MC_Cycles= MC_Cycles)
df_BF, time_BF      = data.initiate_df(file_BF)
df_IMP, time_IMP    = data.initiate_df(file_IMP)



plt.close("all")
f.parameter_analysis_(df_BF, MC_list)
f.parameter_analysis_(df_IMP, MC_list)
# Parameters Analysis
"""
"""
# Parameters Analysis
# Testing how the number of MC-cycles affect the results for the bootstrapping and bloking variance
# For both the brute-force (BF) method and the importance (IMP) method respectively
Dir += "Parameters/MC-Cycles/"
Par = 100; MC_Cycles = 1000 # Just an arbitrary number for this case only

# Analytical case
# file_BF             = data.get_filename(Dir, Importance= False, BB= True, Thr= 8, Dim= 3, Par= Par, MC_Cycles= MC_Cycles)
file_IMP            = data.get_filename(Dir, Importance= True, BB= True, Thr= 8, Dim= 3, Par= Par, MC_Cycles= MC_Cycles)
# df_BF, time_BF      = data.initiate_df(file_BF)
df_IMP, time_IMP    = data.initiate_df(file_IMP)

# data.df_sequential_alpha_sort(df_BF, MC_list)
data.df_sequential_alpha_sort(df_IMP, MC_list)

Example #9

0

Show file

File: Main.py Project: ceguiluzrosas/MyHaven-Smart-Plug

def main():
    # Get Dates to Call API
    today = datetime.datetime.now()
    month_before = today + datetime.timedelta(days=-30)

    # Set some quota/threshold - $ and KWH
    # Note: quota/money is float and bill is $ amount (str)
    monthly_quota = 2.0
    monthly_quota_bill = locale.currency(monthly_quota, grouping=True)
    monthly_quota_kwh = cal.calculate_kwh_from_bill(monthly_quota)

    day_quota = monthly_quota/30
    day_quota_bill = locale.currency(day_quota, grouping=True)
    # day_quota_kwh = cal.calculate_kwh_from_bill(day_quota) not needed at the time

    api = api_call.API()
    logged_in = api.login()
    print ("Logged into API")

    if logged_in[0]:

        # Successful Login
        print(logged_in[1])

        # Collect Data to Populate Data Frame
        collecting_data = api.get_data(cal.api_time(str(month_before)), cal.api_time(str(today)))
        print ("Collected Data")

        if collecting_data[0]:

            # If we're able to collect data, do the following...

            # 1. Clean Data
            [avg_power, cleaned_data] = cal.clean_month_data(collecting_data[1], monthly_quota, monthly_quota_kwh)
            avg_power = avg_power / 1000.0
            quota_hours = monthly_quota_kwh / avg_power
            cleaned_data["Remaining Hours"] = [(quota_hours - (kwh / avg_power)) for kwh in
                                               cleaned_data["Current KWH Total"]]
            print ("Cleaned Data")

            # 2. Create DataFrame with New Data
            data_frame = df.DF(cleaned_data)
            print ("Created DataFrame")

            # 3. Get Recent/Latest Update and Average Energy
            latest_data = data_frame.get_latest(1)
            current_kwh_total = float(latest_data['Current KWH Total'])
            current_day_kwh = float(latest_data['Energy'])

            print ("Got Latest Update")

            # 4. Create Graphs -- Optional
            # cal.create_graph('Current Bill', "Current Quota Bill Difference", monthly_quota, dataFrame)
            # cal.create_graph('Current KWH Total', "Current Quota KWH Difference", monthly_quota_kwh, dataFrame)

            # 5. Check if Monthly and Day Quotas are/were Met
            [current_day_money, current_day_bill] = cal.calculate_bill(current_day_kwh)
            day_update = um.create_update(latest_data, day_quota, day_quota_bill,
                                                current_day_money, current_day_bill, 'daily')
            day_message = um.generate_message(str(today), current_day_money, current_day_bill,
                                                day_quota, "daily")

            [current_month_money, current_month_bill] = cal.calculate_bill(current_kwh_total)
            month_update = um.create_update(latest_data, monthly_quota, monthly_quota_bill,
                                                  current_month_money, current_day_bill, 'monthly')
            month_message = um.generate_message(str(today), current_month_money, current_month_bill,
                                                monthly_quota, "monthly")
            print ("Checked Quotas")


            print (data_frame.to_string())

            output = {"All Good": True,
                      "Status_Report_Month": {
                          "Month_Update": month_update,
                          "Month_Message": month_message
                        },
                      "Status_Report_Day": {
                          "Day_Update": day_update,
                          "Day_Message": day_message
                        }
                      }
            print ("Output Created")
            print (output["Status_Report_Month"])
            print (output["Status_Report_Day"])
            return json.dumps(output)

        else:

            # Unable to Request Data
            output = {"All Good": False}
            return json.dumps(output)

    else:

        # Unsuccessful Login
        return logged_in[1]

Example #10

0

Show file

File: stock-prediction.py Project: bob123kk/NN-Asset-Allocation

    # Shuffle training data
    shuffle_indices = np.random.permutation(np.arange(len(y_train)))
    X_train = X_train[shuffle_indices]
    y_train = y_train[shuffle_indices]

    # Minibatch training
    for i in range(0, len(y_train) // batch_size):
        start = i * batch_size
        batch_x = X_train[start:start + batch_size]
        batch_y = y_train[start:start + batch_size]
        # Run optimizer with batch
        net.run(opt, feed_dict={X: batch_x, Y: batch_y})

        # Show progress
        if np.mod(i, 10) == 0:
            # MSE train and test
            mse_train.append(net.run(mse, feed_dict={X: X_train, Y: y_train}))
            mse_test.append(net.run(mse, feed_dict={X: X_test, Y: y_test}))
            print('MSE Train: ', mse_train[-1])
            print('MSE Test: ', mse_test[-1])
            # Prediction
            pred = net.run(out, feed_dict={X: X_test})
            line2.set_ydata(pred)
            plt.title('Epoch ' + str(e) + ', Batch ' + str(i))
            plt.pause(0.01)

from pandas import DataFrame
pred = DataFrame(pred)
pred.to_csv('/Users/zozozoe/Desktop/daxpart4_pred.csv')

Example #11

0

Show file

File: program flow.py Project: teddymcw/Python-SPSS-AlamedaCounty

conn = engine.connect()

 * # Required for querying tables
metadata = MetaData(conn)

 * # Table to query
tbl = Table(TableName, metadata, autoload=True, schema="dbo")
#tbl.create(checkfirst=True)

 * # Select all
sql = tbl.select()

 * # run sql code
result = conn.execute(sql)

 * # Insert to a dataframe
df = DataFrame(data=list(result), columns=result.keys())

 * # Close connection
conn.close()
ru=DataFrame(df.REFERRAL_CODE)
ru.head()
ruList=[]
for x in ru.REFERRAL_CODE.unique():
   ruList.append(x)
end program.

 * begin program.
 * for item in ruList:
 print item
end program.

Example #12

0

Show file

File: Parsing.py Project: 404cn/compilers

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import DataFrame
from Stack import Stack

productions = DataFrame.productions()
data_map = DataFrame.data_frame()
vns = DataFrame.vns_from_file()
vts = DataFrame.vts_from_file()
stack = Stack()
tokens = []


def init_tokens():
    """用存储词法分析的结果的文件初始化tokens
    """
    global tokens
    with open("./result.txt", 'r') as f:
        buffer = f.read().split('\n')
        for i in range(len(buffer) - 1):
            if buffer[i].split(',')[1][1:-1] == '':
                tokens.append(',')
            else:
                tokens.append(buffer[i].split(',')[1][1:-1])
    tokens.append('$')


def next_token():
    """读取下一个token
    """

Example #13

0

Show file

# 
# <codecell>


#!python
import DataFrame

# <markdowncell>

# and read the file in using our desired CVS dialect:
# 
# <codecell>


#!python
df=DataFrame.read_csv ("CSVSample.csv",dialect=DataFrame.access2000)

# <markdowncell>

# (note that the dialect is actually defined in the DataFrame class). It
# is often useful to filter the data according to some criterion.
# 
# Compatibility with Python 2.6 and above
# ---------------------------------------
# 
# Starting with Python 2.6, the sets module is deprecated, in order to get
# rid of the warning, replace
# 
# <codecell>

Example #14

0

Show file

from DataFrame import *

x = DataFrame("example.csv")
y = x.copy()
z = x.copy()
d = x.copy()
Z = x.copy()

x.add(y)
y.sub(x)
x.transform("x + 1")

y.showDF()

z.add(d)
y.add(x)
x.add(Z)
x.showDF()

y.add(x)
x.add(y)

y.dropNA(0)

y.add(x)
x.add(y)
y.add(x)
x.add(y)

Example #15

0

Show file

rom pandas import DataFrame
from sklearn import linear_model
import matplotlib.pyplot as plt
import statsmodels.api as sm
my_data = {'X1': [0,0,10,10,20,20],
           'X2': [0,0,100,100,400,400],
           'Y1': [5,7,15,17,9,11]
           }
df = DataFrame(my_data,columns=['X1','X2','Y'])

print (df)
plt.scatter(df['X1'], df['Y'], color='red')
plt.title('X1 Vs Y1', fontsize=14)
plt.xlabel('X1', fontsize=14)
plt.ylabel('Y1', fontsize=14)
plt.grid(True)
plt.show()

plt.scatter(df['X2'], df['Y1'], color='green')
plt.title(' X2 Vs Y1', fontsize=14)
plt.xlabel('X2', fontsize=14)
plt.ylabel('Y1', fontsize=14)
plt.grid(True)
plt.show()


X = df[['X1','X2']]
Y = df['Y1']
reg = linear_model.LinearRegression()
reg.fit(X, Y)
print('Intercept: \n', reg.intercept_)

Example #16

0

Show file

File: Q3.py Project: RukhsarAkhtar/MYaseen208

rom pandas import DataFrame
from sklearn import linear_model
import matplotlib.pyplot as plt
import statsmodels.api as sm
my_data = {'P': [30,10,60,40,20,40,50,30],
           'L': [10,0,45,65,70,30,40,20],
           'Y1': [15,8,23,53,55,37,29,16]
           }
df = DataFrame(my_data,columns=['P','L','Y1'])

print (df)
plt.scatter(df['P'], df['Y1'], color='red')
plt.title('P Vs Y1', fontsize=14)
plt.xlabel('P', fontsize=14)
plt.ylabel('Y1', fontsize=14)
plt.grid(True)
plt.show()

plt.scatter(df['L'], df['Y1'], color='green')
plt.title(' L Vs Y1', fontsize=14)
plt.xlabel('L', fontsize=14)
plt.ylabel('Y1', fontsize=14)
plt.grid(True)
plt.show()


X = df[['P','L']]
Y = df['Y1']
reg = linear_model.LinearRegression()
reg.fit(X, Y)
print('Intercept: \n', reg.intercept_)

Example #17

0

Show file

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [32]: s2.index.name = 'index'

In [33]: s2.index
Out[33]: Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='index')
以上代码中先后创建了 s1 和 s2 两个序列，前一个使用了默认的整数索引，后一个使用了我们指定的字符索引，同时还可以我们可以对索引进行命名。

DataFrame 类似于二维数组，有行和列之分，除了像 Series 一样，多个行有索引而外，每个列上面还可以有标签 label, 索引和标签本身都可以被命名：

In [73]: df = DataFrame(np.random.randn(4, 4), index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D'])

In [74]: df
Out[74]:
          A         B         C         D
a -0.112607  0.563528 -0.314797 -1.972133
b -1.378539 -0.939139  0.757630 -0.307336
c  0.866185 -2.155719 -1.485602 -0.344602
d -0.253973 -1.753680 -0.062741  0.911882

In [75]: df.index
Out[75]: Index(['a', 'b', 'c', 'd'], dtype='object')

In [76]: df.columns
Out[76]: Index(['A', 'B', 'C', 'D'], dtype='object')
上面的代码中，通过指定索引和标签（columns 参数）创建了一个 DataFrame 实例。可以通过 df.index 和 df.columns 分别访问索引和标签。

Example #18

0

Show file

File: index_transform.py Project: jlehtoma/zsetup-esmk

def main():

    formats = ['tif', 'img']

    parser = ArgumentParser()
    parser.add_argument("-i", "--in", dest="inws", help="Input workspace")
    parser.add_argument("-o", "--out", dest="outws", help="Output workspace")
    parser.add_argument("-p", "--parameters", dest="parameters",
                        help="Path to parameters csv file")
    parser.add_argument("-l", "--link-field", dest="link_field",
                        help="Link field in the parameters file")
    parser.add_argument("-f", "--format", dest="format", default="tif",
                        help="file format for FILENAME")
    parser.add_argument("-t", "--template", dest="template",
                        default="<BODY1>_<ID1>_<BODY2>_<ID2>_<BODY3>",
                        help="Template for file names in input workspace")

    parser.add_argument("-v", "--verbose", action="store_true", dest="verbose")

    args = parser.parse_args()

    if not args.inws:
        parser.error("Path to input workspace must be provided")
    else:
        inws = os.path.abspath(args.inws)
        if not os.path.exists(inws):
            parser.error("Input workspace {0} does not exist".format(inws))

    if not args.outws:
        parser.error("Path to output workspace must be provided")
    else:
        outws = os.path.abspath(args.outws)
        if not os.path.exists(args.outws):
            parser.error("Output workspace {0} does not exist".format(outws))

    if not args.parameters:
        parser.error("Path to parameters CSV file must be provided")
    else:
        parameters = os.path.abspath(args.parameters)
        if not os.path.exists(parameters):
            parser.error("Parameters file {0} does not ".format(parameters) +
                         " exist")

    if args.format not in formats:
        parser.error("Provided format must be one of: %s"
                     % ', '.join(formats))

    # List the rasters found in the input workspace
    inrasters = list_rasters(inws, [args.format], sorted=True)

    # Read the parameters file in as Dataframe
    parameters_df = DataFrame.read_csv(args.parameters,
                                       dialect=DataFrame.ZCustom)

    # Get all the field names from the generated DataFrame
    fields = parameters_df.get_fields()
    if not args.link_field:
        parser.error("No link field provided, available fields " +
                     "are: \n" + '\n'.join(fields))
    elif args.link_field not in fields:
        parser.error("Link field provided not found, available fields " +
                     "are: \n" + '\n'.join(fields))
    else:
        link_field = args.link_field

    if args.verbose:
        print("\n")
        print("STARTING " + "*" * 70)
        print("Input workspace: {0}".format(inws))
        print("Output workspace: {0}".format(outws))
        print("Parameters file: {0}".format(parameters))
        print("Format: {0}".format(args.format))
        print("File name template: {0}".format(args.template))
        if len(inrasters) > 0:
            print("Following {0} rasters found ".format(len(inrasters) / 2) +
                  "in the input workspace:")
            for raster in inrasters:
                print("\t" + os.path.basename(raster))
        else:
            print("Could not find any rasters with format <" +
                  "{0}> in input workspace".format(args.format))
        print("\n")

    # Construct ParsedFileNames from the input workspace based on a template
    inrasters = [ParsedFileName(raster, args.template) for raster in inrasters]

    process_sigmoidal(inrasters, parameters_df, link_field, outws,
                      multiply=True)

Example #19

0

Show file

File: Main.py Project: AlexGLT/DS-Lab-3

arrOverPowerTime = []
arrOverVoltageTime = []
arrOverAmperageTime = []
arrTimeMeanTime = []

dfOverPowerTime = []
dfOverVoltageTime = []
dfOverAmperageTime = []
dfTimeMeanTime = []

Pandas = [arrOverPowerTime, arrOverVoltageTime, arrOverAmperageTime, arrTimeMeanTime]
Numpy = [dfOverPowerTime, dfOverVoltageTime, dfOverAmperageTime, dfTimeMeanTime]

for i in range(1, 7):
    df = DataFrame.dfCreate(pow(10, i))
    arr = Array.arrCreate(df)

    for j in range(1, 5):
        timea = getTime("a", "{}".format(j), arr, df)
        print(timea)
        Numpy[j - 1].append(timea)
        timed = getTime("d", "{}".format(j), arr, df)
        Pandas[j - 1].append(timed)

count = ["10", "100", "1000", "10000", "100000", "1000000"]

overPowerTable = pd.DataFrame({"Array": Numpy[0], "DataFrame": Pandas[0],
            "Count": count}).set_index("Count")
overVoltageTable = pd.DataFrame({"Array": Numpy[1], "DataFrame": Pandas[1],
            "Count": count}).set_index("Count")

Example #20

0

Show file

File: raster_algebra.py Project: jlehtoma/esmk-misc

        
if __name__ == '__main__':
    # set the working directory
    os.chdir(os.path.dirname(__file__))
    
    datadir = r'C:\Data\Staging\Tests\input'
    outputdir = r'C:\Data\Staging\Tests\output'
    
    # read in the parameters
    
    # ESMK
    pfile = os.path.join('..', 'R', 'parameters_new.csv')
    
    # SuperMetso
    #pfile = r"H:/Data/SuperMetso/MSNFI_params.csv"
    params = DataFrame.read_csv(pfile, dialect=DataFrame.ZCustom)
    
    # Define the fields that link the CSV file to raster name template
    idfield = "IPUULAJI"
    
    # ComplexName template
    ID1 = 'puulaji'
    ID2 = 'osite' 
    
    # ESMK
    template = '<BODY1>_<ID1>_<BODY2>_<ID2>_<BODY3>'    
    
    #template = "<BODY1>_<ID1>_<BODY2>"
    
    raw_rasters = [ParsedFileName(raster, template) for raster in list_rasters(datadir, 
                                                        ['img'], sorted=True)]

Example #21

0

Show file

def huanjings(html):
    huanjing_span = soup.find_all('span', class_="rst")
    huanjings = []
    for huanjing in huanjing_span:
        if "环境" in huanjing.text:
            print(huanjing.text)
            huanjings.append(huanjing.text)
    return huanjings


# print(len(stars(html)))
print(len(huanjings(html)))

df = DataFrame({
    'ID名字': names(html),
    '星级': stars(html),
    '口味': kouweis(html),
    '环境': huanjings(html),
    '服务': fuwus(html),
    '点评内容': pls(html)
})

df.to_csv("E:/python_code/self_learn/df.csv",
          index=False,
          encoding='utf_8_sig')
#df.to_excel("E:/python_code/self_learn/df.xls",sheet_name=’Sheet1’)
writer = pd.ExcelWriter('output.xlsx')
df.to_excel(writer, 'Sheet1')
writer.save()

Python DataFrame, rpy2 Examples