def get_dataframe_and_info(self,df_name): cursor = self.db.dataframes.find({'df_name':df_name}) df = [ column for column in cursor ] dfdict = dict() rownames = [] info = dict() # TU MUSI BYĆ SORTOWANIE po kluczu i to kluczu INTEGER!! for nr,column in enumerate(df): # type(df['columns']) == type([]) # first, get rownames if nr == 0: rownames = column['data'].keys() # Replace decimal point ',' with '.' to convert it later to type 'float' if column['type'] == u'liczba rzeczywista': column['data'] = dict((k, v.replace(',','.')) for (k, v) in column['data'].iteritems()) #convert rows from string to INTEGER rows = dict((int(key), value) for (key, value) in column['data'].items()) # prepare dictionary of columns to convert them into pandas.DataFrame dfdict[column['name']] = [value for (key,value) in rows.iteritems()] info[column['name']] = dict((key,value) for key, value in column.iteritems() if key != 'data') # Copy everything but data psDf = psDataFrame(dfdict, index = rownames ) """ If codebook states that the column is numeric than convert it to numeric """ for column in df['columns']: if column['type'] == u'liczba całkowita': psDf[column['name']] = psDf[column['name']].astype(int) elif column['type'] == u'liczba rzeczywista': psDf[column['name']] = psDf[column['name']].astype(float) else: psDf[column['name']] = psDf[column['name']].astype(str) # return dictionary for views.py return {'info':info,'df': psDf}
def process_data(db): codebook = open("codebook.csv", "r") df = open ("df.csv","r") cb_reader = csv.reader(codebook, delimiter = ';', quotechar = '"') cb_reader.next() ## skip the header of the codebook! data_reader = csv.reader(df,delimiter=";",quotechar='"') data_header = data_reader.next() ## get header df_rows = [row for row in data_reader] pandas_df = psDataFrame(df_rows,columns=data_header) insert_query =mongodb_prepare(cb_reader,pandas_df, 'pierszytest') dataframes = db.dataframes dataframes.insert(insert_query)
def get_dataframe(df_name): dataframes = db.dataframes cursor = db.dataframes.find({'df_name':'pierszytest'}) df = cursor.next() dfdict = dict() rownames = [] # TU MUSI BYĆ SORTOWANIE po kluczu i to kluczu INTEGER!! for nr,column in enumerate(df['columns']): # type(df['columns']) == type([]) print column['name'], " - ",nr if nr == 0: rownames = column['data'].keys() rows = dict((int(key), value) for (key, value) in column['data'].items()) dfdict[column['name']] = [value for (key,value) in rows.iteritems()] pandas_df = psDataFrame(dfdict, index = rownames ) pandas_df.to_csv("df_out.csv", sep=";", quoting=csv.QUOTE_NONNUMERIC,encoding="utf-8",float_format=".")
def process_data(self, df_name, codebook, df): """ get codebook and dataframe, convert df into pandas object and insert it into mongoDB. This can be much improved : TODO find a way to chop df into columns w/o pandas and make a json object directly keeping row numbers. """ # check if df_name already exists. If so, throw a ValidationError self.is_dfname_unique(df_name) cb_reader = csv.reader(codebook, delimiter=';', quotechar='"') cb_reader.next() ## skip the header of the codebook! data_reader = csv.reader(df, delimiter=";", quotechar='"') data_header = data_reader.next() ## get header df_rows = [row for row in data_reader] pandas_df = psDataFrame(df_rows, columns=data_header) self.mongodb_insert_columns(cb_reader, pandas_df, df_name)
def process_data(self, df_name, codebook, df): """ get codebook and dataframe, convert df into pandas object and insert it into mongoDB. This can be much improved : TODO find a way to chop df into columns w/o pandas and make a json object directly keeping row numbers. """ # check if df_name already exists. If so, throw a ValidationError self.is_dfname_unique(df_name) cb_reader = csv.reader(codebook, delimiter = ';', quotechar = '"') cb_reader.next() ## skip the header of the codebook! data_reader = csv.reader(df,delimiter=";",quotechar='"') data_header = data_reader.next() ## get header df_rows = [row for row in data_reader] pandas_df = psDataFrame(df_rows,columns=data_header) self.mongodb_insert_columns(cb_reader,pandas_df, df_name)
def get_dataframe_and_info(self, df_name): cursor = self.db.dataframes.find({'df_name': df_name}) df = [column for column in cursor] dfdict = dict() rownames = [] info = dict() # TU MUSI BYĆ SORTOWANIE po kluczu i to kluczu INTEGER!! for nr, column in enumerate(df): # type(df['columns']) == type([]) # first, get rownames if nr == 0: rownames = column['data'].keys() # Replace decimal point ',' with '.' to convert it later to type 'float' if column['type'] == u'liczba rzeczywista': column['data'] = dict((k, v.replace(',', '.')) for (k, v) in column['data'].iteritems()) #convert rows from string to INTEGER rows = dict( (int(key), value) for (key, value) in column['data'].items()) # prepare dictionary of columns to convert them into pandas.DataFrame dfdict[column['name']] = [ value for (key, value) in rows.iteritems() ] info[column['name']] = dict( (key, value) for key, value in column.iteritems() if key != 'data') # Copy everything but data psDf = psDataFrame(dfdict, index=rownames) """ If codebook states that the column is numeric than convert it to numeric """ for column in df['columns']: if column['type'] == u'liczba całkowita': psDf[column['name']] = psDf[column['name']].astype(int) elif column['type'] == u'liczba rzeczywista': psDf[column['name']] = psDf[column['name']].astype(float) else: psDf[column['name']] = psDf[column['name']].astype(str) # return dictionary for views.py return {'info': info, 'df': psDf}