Esempio n. 1
0
def load_table(table, source = "default/test-0.jsonlines",\
			   modifier = "" , dblogin = "******" , \
			   as_name = None ):
	filename = source if as_name is None else as_name
	dbname, collectioname = parse_path(filename , modifier)
	connection = pymongo.MongoClient( dblogin )
	db = getattr(connection, dbname)
	collection = getattr( db , collectioname )
	try:
		result = collection.insert_many((set_id(obj) for obj in odicts(table)),ordered=False)
	except BulkWriteError as e:
		result = e.details
		errs = set()
		with open( "%s.%s" % (filename,"errors") , "a") as f:
			exporter = JsonLinesItemExporter(f)
			exporter.start_exporting()
			for err in result.get("writeErrors"):
				if not err.get("op").get("_id") in errs:
					obj = dict( item = err.get("op") , \
								error = err.get("errmsg") )
					errs.add( err.get("op").get("_id") )
					exporter.export_item(obj)
			exporter.finish_exporting()
			f.close()
	return result
Esempio n. 2
0
def load(filename,modifier="raw",dblogin = "******", as_name = None ):
	from scrapingtools.etl.calls import read
	table = read(filename)
	dbname,collectionname = parse_path(filename,modifier)
	table = load_table( table, source = filename , modifier = modifier , dblogin=dblogin, \
		as_name = as_name )
	errs = "%s.%s" % (filename,"errors")
	if not os.path.exists(errs):
		errs = None
	return dbname,collectionname,errs