def test_import(self): if dataent.db.exists("Blog Category", "test-category"): dataent.delete_doc("Blog Category", "test-category") exporter.export_data("Blog Category", all_doctypes=True, template=True) content = read_csv_content(dataent.response.result) content.append(["", "test-category", "Test Cateogry"]) importer.upload(content) self.assertTrue( dataent.db.get_value("Blog Category", "test-category", "title"), "Test Category") # export with data exporter.export_data("Blog Category", all_doctypes=True, template=True, with_data=True) content = read_csv_content(dataent.response.result) # overwrite content[-1][3] = "New Title" importer.upload(content, overwrite=True) self.assertTrue( dataent.db.get_value("Blog Category", "test-category", "title"), "New Title")
def import_csv(context, path, only_insert=False, submit_after_import=False, ignore_encoding_errors=False, no_email=True): "Import CSV using data import" from dataent.core.doctype.data_import import importer from dataent.utils.csvutils import read_csv_content site = get_site(context) if not os.path.exists(path): path = os.path.join('..', path) if not os.path.exists(path): print('Invalid path {0}'.format(path)) sys.exit(1) with open(path, 'r') as csvfile: content = read_csv_content(csvfile.read()) dataent.init(site=site) dataent.connect() try: importer.upload(content, submit_after_import=submit_after_import, no_email=no_email, ignore_encoding_errors=ignore_encoding_errors, overwrite=not only_insert, via_console=True) dataent.db.commit() except Exception: print(dataent.get_traceback()) dataent.destroy()
def get_transaction_entries(filename, headers): header_index = {} rows, transactions = [], [] if (filename.lower().endswith("xlsx")): from dataent.utils.xlsxutils import read_xlsx_file_from_attached_file rows = read_xlsx_file_from_attached_file(file_id=filename) elif (filename.lower().endswith("csv")): from dataent.utils.file_manager import get_file_path from dataent.utils.csvutils import read_csv_content filepath = get_file_path(filename) with open(filepath,'rb') as csvfile: rows = read_csv_content(csvfile.read()) elif (filename.lower().endswith("xls")): rows = get_rows_from_xls_file(filename) else: dataent.throw(_("Only .csv and .xlsx files are supported currently")) stmt_headers = headers.values() for row in rows: if len(row) == 0 or row[0] == None or not row[0]: continue #print("Processing row {0}".format(row)) if header_index: transaction = get_transaction_info(stmt_headers, header_index, row) transactions.append(transaction) elif is_headers_present(stmt_headers, row): header_index = get_header_index(stmt_headers, row) return transactions
def test_export_with_data(self): exporter.export_data("User", all_doctypes=True, template=True, with_data=True) content = read_csv_content(dataent.response.result) self.assertTrue(content[1][1], "User") self.assertTrue( '"Administrator"' in [c[1] for c in content if len(c) > 1])
def test_import_only_children(self): user_email = "*****@*****.**" if dataent.db.exists("User", user_email): dataent.delete_doc("User", user_email, force=True) dataent.get_doc({ "doctype": "User", "email": user_email, "first_name": "Test Import UserRole" }).insert() exporter.export_data("Has Role", "User", all_doctypes=True, template=True) content = read_csv_content(dataent.response.result) content.append(["", "*****@*****.**", "Blogger"]) importer.upload(content) user = dataent.get_doc("User", user_email) self.assertTrue( dataent.db.get_value("Has Role", filters={ "role": "Blogger", "parent": user_email, "parenttype": "User" })) self.assertTrue(user.get("roles")[0].role, "Blogger") # overwrite exporter.export_data("Has Role", "User", all_doctypes=True, template=True) content = read_csv_content(dataent.response.result) content.append( ["", "*****@*****.**", "Website Manager"]) importer.upload(content, overwrite=True) user = dataent.get_doc("User", user_email) self.assertEqual(len(user.get("roles")), 1) self.assertTrue(user.get("roles")[0].role, "Website Manager")
def test_export_with_all_doctypes(self): exporter.export_data("User", all_doctypes="Yes", template=True, with_data=True) content = read_csv_content(dataent.response.result) self.assertTrue(content[1][1], "User") self.assertTrue( '"Administrator"' in [c[1] for c in content if len(c) > 1]) self.assertEqual(content[13][0], "DocType:") self.assertEqual(content[13][1], "User") self.assertTrue("Has Role" in content[13])
def test_import_with_children(self): #pylint: disable=R0201 if dataent.db.exists("Event", "EV00001"): dataent.delete_doc("Event", "EV00001") exporter.export_data("Event", all_doctypes="Yes", template=True) content = read_csv_content(dataent.response.result) content.append([None] * len(content[-2])) content[-1][1] = "__Test Event with children" content[-1][2] = "Private" content[-1][3] = "2014-01-01 10:00:00.000000" importer.upload(content) dataent.get_doc("Event", {"subject": "__Test Event with children"})
def import_file_by_path(path, ignore_links=False, overwrite=False, submit=False, pre_process=None, no_email=True): from dataent.utils.csvutils import read_csv_content print("Importing " + path) with open(path, "r") as infile: upload(rows=read_csv_content(infile.read()), ignore_links=ignore_links, no_email=no_email, overwrite=overwrite, submit_after_import=submit, pre_process=pre_process)
def get_csv_contents(files_path): csv_content = {} for filepath in files_path: fname = os.path.basename(filepath) for file_type in [ "account.account.template", "account.account.type", "account.chart.template" ]: if fname.startswith(file_type) and fname.endswith(".csv"): with open(filepath, "r") as csvfile: try: csv_content.setdefault(file_type, [])\ .append(read_csv_content(csvfile.read())) except Exception as e: continue return csv_content
def _bulk_rename(context, doctype, path): "Rename multiple records via CSV file" from dataent.model.rename_doc import bulk_rename from dataent.utils.csvutils import read_csv_content site = get_site(context) with open(path, 'r') as csvfile: rows = read_csv_content(csvfile.read()) dataent.init(site=site) dataent.connect() bulk_rename(doctype, rows, via_console=True) dataent.destroy()
def upload_bank_statement(): if getattr(dataent, "uploaded_file", None): with open(dataent.uploaded_file, "rb") as upfile: fcontent = upfile.read() else: from dataent.utils.file_manager import get_uploaded_content fname, fcontent = get_uploaded_content() if dataent.safe_encode(fname).lower().endswith("csv".encode('utf-8')): from dataent.utils.csvutils import read_csv_content rows = read_csv_content(fcontent, False) elif dataent.safe_encode(fname).lower().endswith("xlsx".encode('utf-8')): from dataent.utils.xlsxutils import read_xlsx_file_from_attached_file rows = read_xlsx_file_from_attached_file(fcontent=fcontent) columns = rows[0] rows.pop(0) data = rows return {"columns": columns, "data": data}
def test_export(self): exporter.export_data("User", all_doctypes=True, template=True) content = read_csv_content(dataent.response.result) self.assertTrue(content[1][1], "User")
def upload(rows=None, submit_after_import=None, ignore_encoding_errors=False, no_email=True, overwrite=None, update_only=None, ignore_links=False, pre_process=None, via_console=False, from_data_import="No", skip_errors=True, data_import_doc=None, validate_template=False, user=None): """upload data""" # for translations if user: dataent.cache().hdel("lang", user) dataent.set_user_lang(user) if data_import_doc and isinstance(data_import_doc, string_types): data_import_doc = dataent.get_doc("Data Import", data_import_doc) if data_import_doc and from_data_import == "Yes": no_email = data_import_doc.no_email ignore_encoding_errors = data_import_doc.ignore_encoding_errors update_only = data_import_doc.only_update submit_after_import = data_import_doc.submit_after_import overwrite = data_import_doc.overwrite skip_errors = data_import_doc.skip_errors else: # extra input params params = json.loads(dataent.form_dict.get("params") or '{}') if params.get("submit_after_import"): submit_after_import = True if params.get("ignore_encoding_errors"): ignore_encoding_errors = True if not params.get("no_email"): no_email = False if params.get('update_only'): update_only = True if params.get('from_data_import'): from_data_import = params.get('from_data_import') if not params.get('skip_errors'): skip_errors = params.get('skip_errors') dataent.flags.in_import = True dataent.flags.mute_emails = no_email def get_data_keys_definition(): return get_data_keys() def bad_template(): dataent.throw( _("Please do not change the rows above {0}").format( get_data_keys_definition().data_separator)) def check_data_length(): if not data: dataent.throw( _("No data found in the file. Please reattach the new file with data." )) def get_start_row(): for i, row in enumerate(rows): if row and row[0] == get_data_keys_definition().data_separator: return i + 1 bad_template() def get_header_row(key): return get_header_row_and_idx(key)[0] def get_header_row_and_idx(key): for i, row in enumerate(header): if row and row[0] == key: return row, i return [], -1 def filter_empty_columns(columns): empty_cols = list(filter(lambda x: x in ("", None), columns)) if empty_cols: if columns[-1 * len(empty_cols):] == empty_cols: # filter empty columns if they exist at the end columns = columns[:-1 * len(empty_cols)] else: dataent.msgprint(_( "Please make sure that there are no empty columns in the file." ), raise_exception=1) return columns def make_column_map(): doctype_row, row_idx = get_header_row_and_idx( get_data_keys_definition().doctype) if row_idx == -1: # old style return dt = None for i, d in enumerate(doctype_row[1:]): if d not in ("~", "-"): if d and doctype_row[i] in (None, '', '~', '-', _("DocType") + ":"): dt, parentfield = d, None # xls format truncates the row, so it may not have more columns if len(doctype_row) > i + 2: parentfield = doctype_row[i + 2] doctypes.append((dt, parentfield)) column_idx_to_fieldname[(dt, parentfield)] = {} column_idx_to_fieldtype[(dt, parentfield)] = {} if dt: column_idx_to_fieldname[(dt, parentfield)][i + 1] = rows[row_idx + 2][i + 1] column_idx_to_fieldtype[(dt, parentfield)][i + 1] = rows[row_idx + 4][i + 1] def get_doc(start_idx): if doctypes: doc = {} attachments = [] last_error_row_idx = None for idx in range(start_idx, len(rows)): last_error_row_idx = idx # pylint: disable=W0612 if (not doc) or main_doc_empty(rows[idx]): for dt, parentfield in doctypes: d = {} for column_idx in column_idx_to_fieldname[( dt, parentfield)]: try: fieldname = column_idx_to_fieldname[( dt, parentfield)][column_idx] fieldtype = column_idx_to_fieldtype[( dt, parentfield)][column_idx] if not fieldname or not rows[idx][column_idx]: continue d[fieldname] = rows[idx][column_idx] if fieldtype in ("Int", "Check"): d[fieldname] = cint(d[fieldname]) elif fieldtype in ("Float", "Currency", "Percent"): d[fieldname] = flt(d[fieldname]) elif fieldtype == "Date": if d[fieldname] and isinstance( d[fieldname], string_types): d[fieldname] = getdate( parse_date(d[fieldname])) elif fieldtype == "Datetime": if d[fieldname]: if " " in d[fieldname]: _date, _time = d[fieldname].split() else: _date, _time = d[ fieldname], '00:00:00' _date = parse_date(d[fieldname]) d[fieldname] = get_datetime(_date + " " + _time) else: d[fieldname] = None elif fieldtype in ("Image", "Attach Image", "Attach"): # added file to attachments list attachments.append(d[fieldname]) elif fieldtype in ("Link", "Dynamic Link", "Data") and d[fieldname]: # as fields can be saved in the number format(long type) in data import template d[fieldname] = cstr(d[fieldname]) except IndexError: pass # scrub quotes from name and modified if d.get("name") and d["name"].startswith('"'): d["name"] = d["name"][1:-1] if sum([0 if not val else 1 for val in d.values()]): d['doctype'] = dt if dt == doctype: doc.update(d) else: if not overwrite and doc.get("name"): d['parent'] = doc["name"] d['parenttype'] = doctype d['parentfield'] = parentfield doc.setdefault(d['parentfield'], []).append(d) else: break return doc, attachments, last_error_row_idx else: doc = dataent._dict(zip(columns, rows[start_idx][1:])) doc['doctype'] = doctype return doc, [], None # used in testing whether a row is empty or parent row or child row # checked only 3 first columns since first two columns can be blank for example the case of # importing the item variant where item code and item name will be blank. def main_doc_empty(row): if row: for i in range(3, 0, -1): if len(row) > i and row[i]: return False return True def validate_naming(doc): autoname = dataent.get_meta(doctype).autoname if autoname: if autoname[0:5] == 'field': autoname = autoname[6:] elif autoname == 'naming_series:': autoname = 'naming_series' else: return True if (autoname not in doc) or (not doc[autoname]): from dataent.model.base_document import get_controller if not hasattr(get_controller(doctype), "autoname"): dataent.throw( _("{0} is a mandatory field".format(autoname))) return True users = dataent.db.sql_list("select name from tabUser") def prepare_for_insert(doc): # don't block data import if user is not set # migrating from another system if not doc.owner in users: doc.owner = dataent.session.user if not doc.modified_by in users: doc.modified_by = dataent.session.user def is_valid_url(url): is_valid = False if url.startswith("/files") or url.startswith("/private/files"): url = get_url(url) try: r = requests.get(url) is_valid = True if r.status_code == 200 else False except Exception: pass return is_valid def attach_file_to_doc(doctype, docname, file_url): # check if attachment is already available # check if the attachement link is relative or not if not file_url: return if not is_valid_url(file_url): return files = dataent.db.sql( """Select name from `tabFile` where attached_to_doctype='{doctype}' and attached_to_name='{docname}' and (file_url='{file_url}' or thumbnail_url='{file_url}')""" .format(doctype=doctype, docname=docname, file_url=file_url)) if files: # file is already attached return save_url(file_url, None, doctype, docname, "Home/Attachments", 0) # header filename, file_extension = ['', ''] if not rows: from dataent.utils.file_manager import get_file # get_file_doc fname, fcontent = get_file(data_import_doc.import_file) filename, file_extension = os.path.splitext(fname) if file_extension == '.xlsx' and from_data_import == 'Yes': from dataent.utils.xlsxutils import read_xlsx_file_from_attached_file rows = read_xlsx_file_from_attached_file( file_id=data_import_doc.import_file) elif file_extension == '.csv': from dataent.utils.csvutils import read_csv_content rows = read_csv_content(fcontent, ignore_encoding_errors) else: dataent.throw(_("Unsupported File Format")) start_row = get_start_row() header = rows[:start_row] data = rows[start_row:] try: doctype = get_header_row(get_data_keys_definition().main_table)[1] columns = filter_empty_columns( get_header_row(get_data_keys_definition().columns)[1:]) except: dataent.throw(_("Cannot change header content")) doctypes = [] column_idx_to_fieldname = {} column_idx_to_fieldtype = {} if skip_errors: data_rows_with_error = header if submit_after_import and not cint( dataent.db.get_value("DocType", doctype, "is_submittable")): submit_after_import = False parenttype = get_header_row(get_data_keys_definition().parent_table) if len(parenttype) > 1: parenttype = parenttype[1] # check permissions if not dataent.permissions.can_import(parenttype or doctype): dataent.flags.mute_emails = False return { "messages": [_("Not allowed to Import") + ": " + _(doctype)], "error": True } # Throw expception in case of the empty data file check_data_length() make_column_map() total = len(data) if validate_template: if total: data_import_doc.total_rows = total return True if overwrite == None: overwrite = params.get('overwrite') # delete child rows (if parenttype) parentfield = None if parenttype: parentfield = get_parent_field(doctype, parenttype) if overwrite: delete_child_rows(data, doctype) import_log = [] def log(**kwargs): if via_console: print( (kwargs.get("title") + kwargs.get("message")).encode('utf-8')) else: import_log.append(kwargs) def as_link(doctype, name): if via_console: return "{0}: {1}".format(doctype, name) else: return getlink(doctype, name) # publish realtime task update def publish_progress(achieved, reload=False): if data_import_doc: dataent.publish_realtime( "data_import_progress", { "progress": str(int(100.0 * achieved / total)), "data_import": data_import_doc.name, "reload": reload }, user=dataent.session.user) error_flag = rollback_flag = False batch_size = dataent.conf.data_import_batch_size or 1000 for batch_start in range(0, total, batch_size): batch = data[batch_start:batch_start + batch_size] for i, row in enumerate(batch): # bypass empty rows if main_doc_empty(row): continue row_idx = i + start_row doc = None publish_progress(i) try: doc, attachments, last_error_row_idx = get_doc(row_idx) validate_naming(doc) if pre_process: pre_process(doc) original = None if parentfield: parent = dataent.get_doc(parenttype, doc["parent"]) doc = parent.append(parentfield, doc) parent.save() else: if overwrite and doc.get("name") and dataent.db.exists( doctype, doc["name"]): original = dataent.get_doc(doctype, doc["name"]) original_name = original.name original.update(doc) # preserve original name for case sensitivity original.name = original_name original.flags.ignore_links = ignore_links original.save() doc = original else: if not update_only: doc = dataent.get_doc(doc) prepare_for_insert(doc) doc.flags.ignore_links = ignore_links doc.insert() if attachments: # check file url and create a File document for file_url in attachments: attach_file_to_doc(doc.doctype, doc.name, file_url) if submit_after_import: doc.submit() # log errors if parentfield: log( **{ "row": doc.idx, "title": 'Inserted row for "%s"' % (as_link(parenttype, doc.parent)), "link": get_absolute_url(parenttype, doc.parent), "message": 'Document successfully saved', "indicator": "green" }) elif submit_after_import: log( **{ "row": row_idx + 1, "title": 'Submitted row for "%s"' % (as_link(doc.doctype, doc.name)), "message": "Document successfully submitted", "link": get_absolute_url(doc.doctype, doc.name), "indicator": "blue" }) elif original: log( **{ "row": row_idx + 1, "title": 'Updated row for "%s"' % (as_link(doc.doctype, doc.name)), "message": "Document successfully updated", "link": get_absolute_url(doc.doctype, doc.name), "indicator": "green" }) elif not update_only: log( **{ "row": row_idx + 1, "title": 'Inserted row for "%s"' % (as_link(doc.doctype, doc.name)), "message": "Document successfully saved", "link": get_absolute_url(doc.doctype, doc.name), "indicator": "green" }) else: log( **{ "row": row_idx + 1, "title": 'Ignored row for %s' % (row[1]), "link": None, "message": "Document updation ignored", "indicator": "orange" }) except Exception as e: error_flag = True # build error message if dataent.local.message_log: err_msg = "\n".join([ '<p class="border-bottom small">{}</p>'.format( json.loads(msg).get('message')) for msg in dataent.local.message_log ]) else: err_msg = '<p class="border-bottom small">{}</p>'.format( cstr(e)) error_trace = dataent.get_traceback() if error_trace: error_log_doc = dataent.log_error(error_trace) error_link = get_absolute_url("Error Log", error_log_doc.name) else: error_link = None log( **{ "row": row_idx + 1, "title": 'Error for row %s' % (len(row) > 1 and dataent.safe_decode(row[1]) or ""), "message": err_msg, "indicator": "red", "link": error_link }) # data with error to create a new file # include the errored data in the last row as last_error_row_idx will not be updated for the last row if skip_errors: if last_error_row_idx == len(rows) - 1: last_error_row_idx = len(rows) data_rows_with_error += rows[row_idx:last_error_row_idx] else: rollback_flag = True finally: dataent.local.message_log = [] start_row += batch_size if rollback_flag: dataent.db.rollback() else: dataent.db.commit() dataent.flags.mute_emails = False dataent.flags.in_import = False log_message = {"messages": import_log, "error": error_flag} if data_import_doc: data_import_doc.log_details = json.dumps(log_message) import_status = None if error_flag and data_import_doc.skip_errors and len(data) != len( data_rows_with_error): import_status = "Partially Successful" # write the file with the faulty row from dataent.utils.file_manager import save_file file_name = 'error_' + filename + file_extension if file_extension == '.xlsx': from dataent.utils.xlsxutils import make_xlsx xlsx_file = make_xlsx(data_rows_with_error, "Data Import Template") file_data = xlsx_file.getvalue() else: from dataent.utils.csvutils import to_csv file_data = to_csv(data_rows_with_error) error_data_file = save_file(file_name, file_data, "Data Import", data_import_doc.name, "Home/Attachments") data_import_doc.error_file = error_data_file.file_url elif error_flag: import_status = "Failed" else: import_status = "Successful" data_import_doc.import_status = import_status data_import_doc.save() if data_import_doc.import_status in [ "Successful", "Partially Successful" ]: data_import_doc.submit() publish_progress(100, True) else: publish_progress(0, True) dataent.db.commit() else: return log_message