예제 #1
0
 def get_dataframe_and_info(self,df_name):
     cursor = self.db.dataframes.find({'df_name':df_name})
     df  = [ column for column in cursor ]
     dfdict = dict()
     rownames = []
     info = dict()
     # TU MUSI BYĆ SORTOWANIE po kluczu i to kluczu INTEGER!!
     for nr,column in enumerate(df): # type(df['columns']) == type([])
         # first, get rownames
         if nr == 0:
             rownames = column['data'].keys()
         # Replace decimal point ',' with '.' to convert it later to type 'float' 
         if column['type'] == u'liczba rzeczywista':
             column['data'] = dict((k, v.replace(',','.')) for (k, v) in column['data'].iteritems())
         #convert rows from string to INTEGER
         rows = dict((int(key), value) for (key, value) in column['data'].items()) 
         # prepare dictionary of columns to convert them into pandas.DataFrame
         dfdict[column['name']] = [value for (key,value) in rows.iteritems()]
         info[column['name']] = dict((key,value) for key, value in column.iteritems() if key != 'data') # Copy everything but data
     psDf = psDataFrame(dfdict, index = rownames )
     """
     If codebook states that the column is numeric than convert it to numeric
     """
     for column in df['columns']:
         if column['type'] == u'liczba całkowita':
             psDf[column['name']] = psDf[column['name']].astype(int)
         elif column['type'] == u'liczba rzeczywista': 
             psDf[column['name']] = psDf[column['name']].astype(float)
         else:
             psDf[column['name']] = psDf[column['name']].astype(str)
     # return dictionary for views.py
     return {'info':info,'df': psDf}
예제 #2
0
def process_data(db):
    codebook = open("codebook.csv", "r")
    df = open ("df.csv","r")
    cb_reader = csv.reader(codebook, delimiter = ';', quotechar = '"')
    cb_reader.next() ## skip the header of the codebook!
    data_reader =  csv.reader(df,delimiter=";",quotechar='"')
    data_header = data_reader.next() ## get header
    df_rows = [row for row in data_reader]                 
    pandas_df = psDataFrame(df_rows,columns=data_header)
    insert_query  =mongodb_prepare(cb_reader,pandas_df, 'pierszytest')
    dataframes = db.dataframes
    dataframes.insert(insert_query)
예제 #3
0
def get_dataframe(df_name):
    dataframes = db.dataframes
    cursor = db.dataframes.find({'df_name':'pierszytest'})
    df = cursor.next()
    dfdict = dict()
    rownames = []
    # TU MUSI BYĆ SORTOWANIE po kluczu i to kluczu INTEGER!!
    for nr,column in enumerate(df['columns']): # type(df['columns']) == type([])
        print column['name'], " - ",nr
        if nr == 0:
            rownames = column['data'].keys()
        rows = dict((int(key), value) for (key, value) in column['data'].items()) 
        dfdict[column['name']] = [value for (key,value) in rows.iteritems()]

    pandas_df = psDataFrame(dfdict, index = rownames )
    pandas_df.to_csv("df_out.csv", sep=";", quoting=csv.QUOTE_NONNUMERIC,encoding="utf-8",float_format=".")
예제 #4
0
    def process_data(self, df_name, codebook, df):
        """
        get codebook and dataframe, convert df into pandas object and 
        insert it into mongoDB. This can be much improved :
        TODO find a way to chop df into columns w/o pandas and make a json object
        directly keeping row numbers.
        """
        # check if df_name already exists. If so, throw a ValidationError
        self.is_dfname_unique(df_name)

        cb_reader = csv.reader(codebook, delimiter=';', quotechar='"')
        cb_reader.next()  ## skip the header of the codebook!
        data_reader = csv.reader(df, delimiter=";", quotechar='"')
        data_header = data_reader.next()  ## get header
        df_rows = [row for row in data_reader]
        pandas_df = psDataFrame(df_rows, columns=data_header)
        self.mongodb_insert_columns(cb_reader, pandas_df, df_name)
예제 #5
0
    def process_data(self, df_name, codebook, df):
        """
        get codebook and dataframe, convert df into pandas object and 
        insert it into mongoDB. This can be much improved :
        TODO find a way to chop df into columns w/o pandas and make a json object
        directly keeping row numbers.
        """
        # check if df_name already exists. If so, throw a ValidationError
        self.is_dfname_unique(df_name)

        cb_reader = csv.reader(codebook, delimiter = ';', quotechar = '"')
        cb_reader.next() ## skip the header of the codebook!
        data_reader =  csv.reader(df,delimiter=";",quotechar='"')
        data_header = data_reader.next() ## get header
        df_rows = [row for row in data_reader] 
        pandas_df = psDataFrame(df_rows,columns=data_header)
        self.mongodb_insert_columns(cb_reader,pandas_df, df_name)
예제 #6
0
 def get_dataframe_and_info(self, df_name):
     cursor = self.db.dataframes.find({'df_name': df_name})
     df = [column for column in cursor]
     dfdict = dict()
     rownames = []
     info = dict()
     # TU MUSI BYĆ SORTOWANIE po kluczu i to kluczu INTEGER!!
     for nr, column in enumerate(df):  # type(df['columns']) == type([])
         # first, get rownames
         if nr == 0:
             rownames = column['data'].keys()
         # Replace decimal point ',' with '.' to convert it later to type 'float'
         if column['type'] == u'liczba rzeczywista':
             column['data'] = dict((k, v.replace(',', '.'))
                                   for (k, v) in column['data'].iteritems())
         #convert rows from string to INTEGER
         rows = dict(
             (int(key), value) for (key, value) in column['data'].items())
         # prepare dictionary of columns to convert them into pandas.DataFrame
         dfdict[column['name']] = [
             value for (key, value) in rows.iteritems()
         ]
         info[column['name']] = dict(
             (key, value) for key, value in column.iteritems()
             if key != 'data')  # Copy everything but data
     psDf = psDataFrame(dfdict, index=rownames)
     """
     If codebook states that the column is numeric than convert it to numeric
     """
     for column in df['columns']:
         if column['type'] == u'liczba całkowita':
             psDf[column['name']] = psDf[column['name']].astype(int)
         elif column['type'] == u'liczba rzeczywista':
             psDf[column['name']] = psDf[column['name']].astype(float)
         else:
             psDf[column['name']] = psDf[column['name']].astype(str)
     # return dictionary for views.py
     return {'info': info, 'df': psDf}