def import_one_resource(line): """this single resource import function must be outside of the BusinessDataImporter class in order for it to be called with multiprocessing""" connections.close_all() reader = ArchesFileReader() archesresource = JSONDeserializer().deserialize(line) reader.import_business_data({"resources": [archesresource]})
def import_business_data(self, file_format=None, business_data=None, mapping=None, overwrite='append', bulk=False, create_concepts=False, create_collections=False): reader = None start = time() cursor = connection.cursor() try: if file_format == None: file_format = self.file_format if business_data == None: business_data = self.business_data if mapping == None: mapping = self.mapping if file_format == 'json': reader = ArchesFileReader() reader.import_business_data(business_data, mapping) elif file_format == 'csv' or file_format == 'shp' or file_format == 'zip': if mapping != None: reader = CsvReader() reader.import_business_data( business_data=business_data, mapping=mapping, overwrite=overwrite, bulk=bulk, create_concepts=create_concepts, create_collections=create_collections) else: print '*' * 80 print 'ERROR: No mapping file detected. Please indicate one with the \'-c\' paramater or place one in the same directory as your business data.' print '*' * 80 sys.exit() elapsed = (time() - start) print 'Time to import_business_data = {0}'.format( datetime.timedelta(seconds=elapsed)) reader.report_errors() finally: datatype_factory = DataTypeFactory() datatypes = DDataType.objects.all() for datatype in datatypes: datatype_instance = datatype_factory.get_instance( datatype.datatype) datatype_instance.after_update_all()
def import_business_data(self, file_format=None, business_data=None, mapping=None, overwrite='append', bulk=False): reader = None start = time() cursor = connection.cursor() try: if file_format == None: file_format = self.file_format if business_data == None: business_data = self.business_data if mapping == None: mapping = self.mapping if file_format == 'json': reader = ArchesFileReader() reader.import_business_data(business_data, mapping) elif file_format == 'csv': if mapping != None: reader = CsvReader() reader.import_business_data(business_data=business_data, mapping=mapping, overwrite=overwrite, bulk=bulk) else: print '*'*80 print 'ERROR: No mapping file detected. Please indicate one with the \'-c\' paramater or place one in the same directory as your business data.' print '*'*80 sys.exit() elapsed = (time() - start) print 'Time to import_business_data = {0}'.format(datetime.timedelta(seconds=elapsed)) reader.report_errors() finally: datatype_factory = DataTypeFactory() datatypes = DDataType.objects.all() for datatype in datatypes: datatype_instance = datatype_factory.get_instance(datatype.datatype) datatype_instance.after_update_all()
def import_business_data(self, file_format=None, business_data=None, mapping=None, overwrite='append', bulk=False, create_concepts=False, create_collections=False, use_multiprocessing=False): reader = None start = time() cursor = connection.cursor() try: if file_format == None: file_format = self.file_format if business_data == None: business_data = self.business_data if mapping == None: mapping = self.mapping if file_format == 'json': reader = ArchesFileReader() reader.import_business_data(business_data, mapping) elif file_format == 'jsonl': with open(self.file[0], 'rU') as openf: lines = openf.readlines() if use_multiprocessing is True: pool = Pool(cpu_count()) pool.map(import_one_resource, lines) connections.close_all() reader = ArchesFileReader() else: reader = ArchesFileReader() for line in lines: archesresource = JSONDeserializer().deserialize( line) reader.import_business_data( {"resources": [archesresource]}) elif file_format == 'csv' or file_format == 'shp' or file_format == 'zip': if mapping != None: reader = CsvReader() reader.import_business_data( business_data=business_data, mapping=mapping, overwrite=overwrite, bulk=bulk, create_concepts=create_concepts, create_collections=create_collections) else: print '*' * 80 print 'ERROR: No mapping file detected. Please indicate one with the \'-c\' paramater or place one in the same directory as your business data.' print '*' * 80 sys.exit() elapsed = (time() - start) print 'Time to import_business_data = {0}'.format( datetime.timedelta(seconds=elapsed)) reader.report_errors() finally: datatype_factory = DataTypeFactory() datatypes = DDataType.objects.all() for datatype in datatypes: datatype_instance = datatype_factory.get_instance( datatype.datatype) datatype_instance.after_update_all()