def validate_import_file(file_path): """ Run import file against required fields 'name' and 'corporate_membership_type' are required fields """ normalize_newline(file_path) data = csv.reader(default_storage.open(file_path, mode='rU')) fields = data.next() fields = [smart_str(field) for field in fields] corp_memb_keys = [slugify(cm) for cm in fields] required = ('name','corporate_membership_type') requirements = [r in corp_memb_keys for r in required] missing_required_fields = [r for r in required if r not in fields] return all(requirements), missing_required_fields
def validate_import_file(file_path): """ Run import file against required fields 'name' and 'corporate_membership_type' are required fields """ normalize_newline(file_path) data = csv.reader(default_storage.open(file_path, mode='rU')) fields = data.next() fields = [smart_str(field) for field in fields] corp_memb_keys = [slugify(cm) for cm in fields] required = ('name', 'corporate_membership_type') requirements = [r in corp_memb_keys for r in required] missing_required_fields = [r for r in required if r not in fields] return all(requirements), missing_required_fields
def memb_import_parse_csv(mimport): """ Parse csv data into a dictionary. """ normalize_newline(mimport.upload_file.name) csv_reader = csv.reader( default_storage.open(mimport.upload_file.name, 'rb')) fieldnames = csv_reader.next() fieldnames = normalize_field_names(fieldnames) data_list = [] for row in csv_reader: data_list.append(dict(zip(fieldnames, row))) return fieldnames, data_list
def user_import_parse_csv(mimport): """ Parse csv data into a dictionary. """ normalize_newline(mimport.upload_file.name) csv_reader = csv.reader( default_storage.open(mimport.upload_file.name, 'rb')) fieldnames = csv_reader.next() fieldnames = normalize_field_names(fieldnames) data_list = [] for row in csv_reader: if not row: continue data_list.append(dict(zip(fieldnames, row))) return fieldnames, data_list
def csv_to_dict(file_path, **kwargs): """ Returns a list of dicts. Each dict represents record. """ machine_name = kwargs.get('machine_name', False) # null byte; assume xls; not csv if has_null_byte(file_path): return [] normalize_newline(file_path) csv_file = csv.reader(default_storage.open(file_path, 'rU')) colnames = csv_file.next() # row 1; if machine_name: colnames = [slugify(c).replace('-', '') for c in colnames] cols = xrange(len(colnames)) lst = [] # make sure colnames are unique duplicates = {} for i in cols: for j in cols: # compare with previous and next fields if i != j and colnames[i] == colnames[j]: number = duplicates.get(colnames[i], 0) + 1 duplicates[colnames[i]] = number colnames[j] = colnames[j] + "-" + str(number) for row in csv_file: entry = {} rows = len(row) - 1 for col in cols: if col > rows: break # go to next row entry[colnames[col]] = row[col] lst.append(entry) return lst # list of dictionaries
def extract_from_excel(file_path): if not default_storage.exists(file_path): raise NameError("%s is not a valid file." % file_path) file_ext = (file_path[-4:]).lower() if file_ext != ".csv" and file_ext != ".xls": raise NameError("%s is not a valid file type (should be either .csv or .xls)." % file_path) fields = [] data_list = [] if file_ext == ".csv": import csv import dateutil.parser as dparser normalize_newline(file_path) data = csv.reader(default_storage.open(file_path, "rU")) # read the column header fields = data.next() fields = [smart_str(field) for field in fields] r = 1 for row in data: item = dict(zip(fields, row)) for key in item.keys(): if key in field_type_dict and field_type_dict[key] == "DateTimeField": item[key] = dparser.parser(item[key]) item["ROW_NUM"] = r + 1 data_list.append(item) r += 1 else: book = xlrd.open_workbook(file_path) nsheets = book.nsheets nrows = book.sheet_by_index(0).nrows # get the fields from the first row for i in range(0, nsheets): sh = book.sheet_by_index(i) for c in range(0, sh.ncols): col_item = sh.cell_value(rowx=0, colx=c) fields.append(smart_str(col_item)) # get the data - skip the first row for r in range(1, nrows): row = [] for i in range(0, nsheets): sh = book.sheet_by_index(i) for c in range(0, sh.ncols): cell = sh.cell(r, c) cell_value = cell.value if cell.ctype == xlrd.XL_CELL_DATE: date_tuple = xlrd.xldate_as_tuple(cell_value, book.datemode) cell_value = datetime.date(date_tuple[0], date_tuple[1], date_tuple[2]) elif cell.ctype in (2, 3) and int(cell_value) == cell_value: # so for zipcode 77079, # we don't end up with 77079.0 cell_value = int(cell_value) row.append(cell_value) item = dict(zip(fields, row)) item["ROW_NUM"] = r + 1 data_list.append(item) return data_list
def extract_from_excel(file_path): if not default_storage.exists(file_path): raise NameError("%s is not a valid file." % file_path) file_ext = (file_path[-4:]).lower() if file_ext != '.csv' and file_ext != '.xls': raise NameError( "%s is not a valid file type (should be either .csv or .xls)." % file_path ) fields = [] data_list = [] if file_ext == '.csv': import csv import dateutil.parser as dparser normalize_newline(file_path) data = csv.reader(default_storage.open(file_path, 'rU')) # read the column header fields = data.next() fields = [smart_str(field) for field in fields] r = 1 for row in data: item = dict(zip(fields, row)) for key in item.keys(): if key in field_type_dict and \ field_type_dict[key] == 'DateTimeField': item[key] = dparser.parser(item[key]) item['ROW_NUM'] = r + 1 data_list.append(item) r += 1 else: book = xlrd.open_workbook(file_path) nsheets = book.nsheets nrows = book.sheet_by_index(0).nrows # get the fields from the first row for i in range(0, nsheets): sh = book.sheet_by_index(i) for c in range(0, sh.ncols): col_item = sh.cell_value(rowx=0, colx=c) fields.append(smart_str(col_item)) # get the data - skip the first row for r in range(1, nrows): row = [] for i in range(0, nsheets): sh = book.sheet_by_index(i) for c in range(0, sh.ncols): cell = sh.cell(r, c) cell_value = cell.value if cell.ctype == xlrd.XL_CELL_DATE: date_tuple = xlrd.xldate_as_tuple( cell_value, book.datemode) cell_value = datetime.date(date_tuple[0], date_tuple[1], date_tuple[2]) elif cell.ctype in (2, 3) \ and int(cell_value) == cell_value: # so for zipcode 77079, # we don't end up with 77079.0 cell_value = int(cell_value) row.append(cell_value) item = dict(zip(fields, row)) item['ROW_NUM'] = r + 1 data_list.append(item) return data_list