def aggregate(*columns, fill_empty: bool = False, fill_value=None): """ Aggregate in a new Dataset the columns :param columns: columns added :param fill_empty: fill the empty field of data with "fill_value" argument :param fill_value: fill value for empty field if "fill_empty" argument is specified :return: Dataset """ if len(columns) >= 2: new_data = Dataset() # Check len of all columns last_list = columns[0] for list_ in columns[1:]: if fill_empty: while len(last_list) != len(list_): list_.append( fill_value() if callable(fill_value) else fill_value) else: if len(last_list) != len(list_): raise InvalidDimensions( 'the columns are not the same length') last_list = list_ # Aggregate columns for column in columns: new_data.append_col(column) return new_data else: raise ReportDataError('you can aggregate two or more columns')
def crear_multiple(request): accionOk = None accionReturn = None instituciones = Institucion.objects.filter(activo=True) if request.method == 'POST': try: institucion = request.POST['institucion_select'] datos_resource = DeportistaResource() dataset = Dataset() nuevos_datos = request.FILES['deportistasxlsfile'] dataset.load(nuevos_datos.read()) institucion_col = [institucion for i in range(dataset.height)] dataset.append_col(institucion_col, header="institucion") datos_resource.import_data(dataset, dry_run=False) accionOk = True accionReturn = "Los deportistas han sido creado correctamente." except Exception as e: accionOk = False accionReturn = "Ocurrió un error al crear los deportistas. Error: {}".format( e) c = { 'usuario_logueado': True, 'usuario': request.user.username, 'nav_id': 'nav_deportistas', 'accionOk': accionOk, 'accionReturn': accionReturn, 'instituciones': instituciones, } return render(request, 'deportista/deportista_crear_multiple.html', context=c)
def export_to_csv(vals, filename): data = Dataset() data.append_col(vals[0], header='Date') data.append_col(vals[1], header='Total People') file = open(filename, 'w') file.write(data.csv) file.close()
def test_file_to_dataset_incorrect(self): """If input file is not correctly decoded, returns an error.""" dataset = Dataset() dataset.append_col(['row1', 'row2']) dataset.headers = ['col1'] encoded_dataset = dataset.export('csv').encode('utf-16') bio = io.BytesIO(encoded_dataset) uploaded_file = self.create_uploaded_file(file=bio, ) uploaded_dataset, error_msg = self.file_to_dataset(uploaded_file) self.assertIsNone(uploaded_dataset) self.assertIsNotNone(error_msg)
def test_file_to_dataset_correct(self): """If input file can be decoded returns correct dataset.""" dataset = Dataset() dataset.append_col(['row1', 'row2']) dataset.headers = ['col1'] encoded_dataset = dataset.export('csv').encode('utf-8') bio = io.BytesIO(encoded_dataset) uploaded_file = self.create_uploaded_file(file=bio, ) uploaded_dataset, error_msg = self.file_to_dataset(uploaded_file) self.assertEqual( uploaded_dataset.dict, dataset.dict, ) self.assertEqual( uploaded_dataset.headers, dataset.headers, ) self.assertIsNone(error_msg)
def change_sheet_to_ext(songs: tablib.Dataset, path: str) -> tablib.Dataset: local_songs = get_local_songs(path) songs.append_col(add_sheet_ext, "sheet_type") del songs["sheet_type"] return songs
def get_dataset(self): dataset = Dataset() # Prépare le fichier dataset.append_col([ u('Date'), u('Canton'), u('Établissement'), u('Emplacement'), u('Heures'), u('Nombre de qualifs'), # Logistique u('Moniteur +'), u('Mauvais temps'), u('Pommes'), u('Total vélos'), u('Total casques'), u('Remarques'), # Qualif u('Classe'), u('Enseignant'), u('Moniteur 2'), u('Moniteur 1'), u('Moniteur 1'), u('Nombre d\'élèves'), u('Nombre de vélos'), u('Nombre de casques'), CATEGORY_CHOICE_A, CATEGORY_CHOICE_B, CATEGORY_CHOICE_C, u('Intervenant'), u('Remarques'), ]) session = self.object session_place = session.place if not session_place: session_place = ( session.address_city if session.address_city else session.organization.address_city ) col = [ date(session.day), session.organization.address_canton, session.organization.name, session_place, '%s - %s' % (time(session.begin), time(session.end)), session.n_qualifications, EXPORT_NAMETEL.format( name=session.superleader.get_full_name(), tel=session.superleader.profile.natel ) if session.superleader else '', str(session.fallback), session.apples, session.n_bikes, session.n_helmets, session.comments, ] if session.n_qualifications: for quali in session.qualifications.all(): if not len(col): col = [''] * 12 col.append(quali.name) col.append( EXPORT_NAMETEL.format( name=quali.class_teacher_fullname, tel=quali.class_teacher_natel ) if quali.class_teacher_fullname else '' ) col.append( EXPORT_NAMETEL.format( name=quali.leader.get_full_name(), tel=quali.leader.profile.natel ) if quali.leader else '' ) for i in range(2): try: helper = quali.helpers.all()[i] col.append( EXPORT_NAMETEL.format( name=helper.get_full_name(), tel=helper.profile.natel ) if helper else '' ) except IndexError: col.append('') col.append(quali.n_participants) col.append(quali.n_bikes) col.append(quali.n_helmets) col.append( str(quali.activity_A) if quali.activity_A else '') col.append( str(quali.activity_B) if quali.activity_B else '') col.append( str(quali.activity_C) if quali.activity_C else '') col.append( EXPORT_NAMETEL.format( name=quali.actor.get_full_name(), tel=quali.actor.profile.natel ) if quali.actor else '' ) col.append(quali.comments) dataset.append_col(col) col = [] else: col += [''] * 13 dataset.append_col(col) return dataset
def get_dataset(self): dataset = Dataset() firstcol = [ u('Date'), u('Canton'), u('Établissement'), u('Emplacement'), u('Heures'), u('Nombre de qualifs'), ] # Trouve toutes les personnes qui sont présentes dans cette saison qs = get_user_model().objects user_filter = [ # Moniteurs + Q(sess_monplus__in=self.season.sessions_with_qualifs), # Moniteurs 2 Q(qualifs_mon2__session__in=self.season.sessions_with_qualifs), # Moniteurs 1 Q(qualifs_mon1__session__in=self.season.sessions_with_qualifs), # Intervenants Q(qualifs_actor__session__in=self.season.sessions_with_qualifs), ] qs = ( qs.filter(reduce(operator.or_, user_filter)) .distinct() .order_by('first_name', 'last_name') ) firstcol += [user.get_full_name() for user in qs] dataset.append_col(firstcol) for session in self.season.sessions_with_qualifs: session_place = session.place if not session_place: session_place = ( session.address_city if session.address_city else session.organization.address_city ) col = [ date(session.day), session.organization.address_canton, session.organization.name, session_place, '%s - %s' % (time(session.begin), time(session.end)), session.n_qualifications, ] for user in qs: label = '' if user == session.superleader: # Translators: Nom court pour 'Moniteur +' label = u('M+') else: for quali in session.qualifications.all(): if user == quali.leader: # Translators: Nom court pour 'Moniteur 2' label = u('M2') break elif user in quali.helpers.all(): # Translators: Nom court pour 'Moniteur 1' label = u('M1') break elif user == quali.actor: # Translators: Nom court pour 'Intervenant' label = u('I') break col += [label] dataset.append_col(col) return dataset
class TransTab(object): def __init__(self, in_fname, format_fname, out_sheet='Sheet1', out_fname=''): self.in_fname = in_fname self.in_fname_prefix, self.in_type = os.path.splitext(in_fname) self.in_type = self.in_type.lstrip('.') self.out_fname_prefix, self.out_type = os.path.splitext(out_fname) self.out_type = self.out_type.lstrip('.') if not self.out_fname_prefix or not self.out_type: self.out_fname_prefix = self.in_fname_prefix + '_formatted' self.out_type = 'xlsx' self.out_sheet = out_sheet with open(format_fname, 'r') as f: self.file_format = f.read() sys.path.append(os.path.dirname(os.path.abspath(format_fname))) self.format_name = os.path.splitext(os.path.basename(format_fname))[0] try: self.format_module = importlib.import_module(self.format_name) except: self.format_module = '' if self.in_type in ['xls', 'xlsx']: self.file_read_mode = 'rb' else: self.file_read_mode = 'r' with open(in_fname, self.file_read_mode) as f: self.data = Dataset().load(f.read()) if self.in_type in ['xls', 'xlsx']: for i, row in enumerate(self.data): self.data[i] = [int(n) if type(n)==float and n == int(n) else n for n in row] def preprocess_dates(self, cols): for i, row in enumerate(self.data.dict): for col in cols: if not row[col]: continue try: if self.in_type in ['xls', 'xlsx']: row[col] = xldate.xldate_as_datetime(row[col], 0) else: row[col] = maya.when(row[col]).datetime().replace(tzinfo=None) except: row[col] = maya.when(row[col]).datetime().replace(tzinfo=None) self.data[i] = list(row.values()) def new_col(self, col_name): self.data.append_col(lambda x: '', header=col_name) def clear_col(self, col_name): pos = self.data.headers.index(col_name) self.delete_col(col_name) self.data.insert_col(pos, lambda x: '', header=col_name) def delete_rows_by_column_val(self, col_name, val): arr = self.data[col_name] removed_count = 0 for ind in range(0, len(arr)): if val == '': if arr[ind] == val: del self.data[ind-removed_count] removed_count += 1 else: if val in arr[ind]: del self.data[ind-removed_count] removed_count += 1 def delete_row(self, n): n = n - 2 if n == -1: new_data = Dataset() new_data.headers = self.data[0] for row in self.data[1:]: new_data.append(row) self.data = new_data else: del self.data[n] def delete_col(self, col_name): del self.data[col_name] def drop(self): n = len(self.data) - 1 if n == -1: new_data = Dataset() new_data.headers = self.data[0] for row in self.data[1:]: new_data.append(row) self.data = new_data else: del self.data[n] def rename_col(self, col_name, new_name): pos = self.data.headers.index(col_name) self.data.headers[pos] = new_name def copy_col(self, src_col, dest_col): if dest_col not in self.data.headers: self.new_col(dest_col) src_pos = self.data.headers.index(src_col) dest_pos = self.data.headers.index(dest_col) self.delete_col(dest_col) self.data.insert_col(dest_pos, lambda row: row[src_pos], header=dest_col) def cutpaste_col(self, src_col, dest_col): if dest_col not in self.data.headers: self.new_col(dest_col) self.copy_col(src_col, dest_col) self.clear_col(src_col) def concatenate_col(self, src_cols, dest_col, join_str): if dest_col not in self.data.headers: self.new_col(dest_col) for i, row in enumerate(self.data.dict): row[dest_col] = join_str.join([str(row[c]) for c in src_cols]) self.data[i] = list(row.values()) def replace(self, col_name, kv_map, has_default, default_val, case_insensitive): pos = self.data.headers.index(col_name) if case_insensitive: val = (i.lower() for i in self.data[col_name]) kv_map = {k.lower(): v for k, v in kv_map.items()} else: val = (i for i in self.data[col_name]) self.delete_col(col_name) self.data.insert_col(pos, lambda row: kv_map.get(str(next(val)), default_val) if has_default else kv_map[str(next(val))], header=col_name) def delete_duplicates(self, col_name=''): if not col_name: self.data.remove_duplicates() else: seen = set() pos = self.data.headers.index(col_name) self.data._data[:] = [row for row in self.data._data if not (row[pos] in seen or seen.add(row[pos]))] def sum_delete_duplicates(self, sum_col, unique_col): unique_val_sum = {} unique_val_row1 = {} # Collect sum of sum_cols per unique_col and also the first row number per unique col value for i, row in enumerate(self.data.dict): sum_col_val = row[sum_col] unique_col_val = row[unique_col] if unique_col_val in unique_val_sum: unique_val_sum[unique_col_val] += sum_col_val else: unique_val_sum[unique_col_val] = sum_col_val unique_val_row1[unique_col_val] = i # Set the sum for each unique col value in the respective first occurrence rows for unique_col_val in unique_val_row1: first_row_num = unique_val_row1[unique_col_val] row = self.data.dict[first_row_num] row[sum_col] = unique_val_sum[unique_col_val] self.data[first_row_num] = list(row.values()) self.delete_duplicates(unique_col) def get_custom_func(self, name): ''' Suppose the format is specified as <_some_path_/riceland.txt>. The program tries to load the custom operations definition file at <_some_path_/riceland.py>. There is also a global_operations.py file that contains custom operations definitions. If you modify this, then the program has to be reinstalled for it to be available at commandline. ''' if hasattr(self.format_module, name): func_obj = getattr(self.format_module, name) if name in globals(): print('{} defined both globally and for the format. Using the format definition'.format(name)) else: if name in globals(): func_obj = globals()[name] else: print('{} is not defined'.format(name)) sys.exit(-1) return func_obj def do_custom_operation(self, op, quit_on_error=False): ''' This operates on a single column. Done when there is an instruction of the kind: do <custom_op_name> on <col_name> The custom function executed by passing the following parameters: the row dict, row number, flag whether to quit if there is an error. It expects the modified row dict as the return value which will be assigned back to the main data. ''' for i, row in enumerate(self.data.dict): self.data[i] = list(op(row, i, quit_on_error).values()) def do_custom_operation_col(self, op, col_name, quit_on_error=False): ''' This operates on a single column. Done when there is an instruction of the kind: do <custom_op_name> on <col_name> The custom function executed by passing the following parameters: the cell value, row dict, row number, column number, flag whether to quit if there is an error. It expects the modified vell value as the return value which will be assigned back to the column in main data. ''' for i, row in enumerate(self.data.dict): row[col_name] = op(row[col_name], row, i, col_name, quit_on_error) self.data[i] = list(row.values()) def transform(self): for f_cmd in GRAMMAR.parseString(self.file_format): if f_cmd.decl == 'dates': self.preprocess_dates(f_cmd.cols) elif f_cmd.op == 'new': self.new_col(f_cmd.col_name) elif f_cmd.op == 'clear': self.clear_col(f_cmd.col_name) elif f_cmd.op == 'delete': if f_cmd.row: self.delete_row(f_cmd.row_num) else: self.delete_col(f_cmd.col_name) elif f_cmd.op == 'drop': self.drop() elif f_cmd.op == 'rename': self.rename_col(f_cmd.col_name, f_cmd.new_name) elif f_cmd.op == 'copy': self.copy_col(f_cmd.src_col, f_cmd.dest_col) elif f_cmd.op == 'cutpaste': self.cutpaste_col(f_cmd.src_col, f_cmd.dest_col) elif f_cmd.op == 'concatenate': self.concatenate_col(f_cmd.src_cols, f_cmd.dest_col, f_cmd.join_str) elif f_cmd.op == 'replace': self.replace(f_cmd.col_name, f_cmd.kv_map, f_cmd.has_default, f_cmd.default_val, f_cmd.case_insensitive) elif f_cmd.op == 'delete-duplicate-rows': self.delete_duplicates(f_cmd.col_name) elif f_cmd.op == 'delete-rows-by-column-val': self.delete_rows_by_column_val(f_cmd.col, f_cmd.val) elif f_cmd.op == 'sum-col-and-delete-duplicate-rows': self.sum_delete_duplicates(f_cmd.sum_col, f_cmd.unique_col) elif f_cmd.op == 'do' and f_cmd.custom_op_name: func_obj = self.get_custom_func(f_cmd.custom_op_name) if f_cmd.col_name: self.do_custom_operation_col(func_obj, f_cmd.col_name, f_cmd.quit_on_error) else: self.do_custom_operation(func_obj, f_cmd.quit_on_error) else: print("Don't know how to process this instruction: {}".format(f_cmd)) self.save() def save(self): book = Databook() self.data.title = self.out_sheet book.add_sheet(self.data) with open(self.out_fname_prefix + '.' + self.out_type, 'wb') as f: f.write(book.export(self.out_type))