def load_table(table, source = "default/test-0.jsonlines",\ modifier = "" , dblogin = "******" , \ as_name = None ): filename = source if as_name is None else as_name dbname, collectioname = parse_path(filename , modifier) connection = pymongo.MongoClient( dblogin ) db = getattr(connection, dbname) collection = getattr( db , collectioname ) try: result = collection.insert_many((set_id(obj) for obj in odicts(table)),ordered=False) except BulkWriteError as e: result = e.details errs = set() with open( "%s.%s" % (filename,"errors") , "a") as f: exporter = JsonLinesItemExporter(f) exporter.start_exporting() for err in result.get("writeErrors"): if not err.get("op").get("_id") in errs: obj = dict( item = err.get("op") , \ error = err.get("errmsg") ) errs.add( err.get("op").get("_id") ) exporter.export_item(obj) exporter.finish_exporting() f.close() return result
def load(filename,modifier="raw",dblogin = "******", as_name = None ): from scrapingtools.etl.calls import read table = read(filename) dbname,collectionname = parse_path(filename,modifier) table = load_table( table, source = filename , modifier = modifier , dblogin=dblogin, \ as_name = as_name ) errs = "%s.%s" % (filename,"errors") if not os.path.exists(errs): errs = None return dbname,collectionname,errs